diff --git a/doc/index.rst b/doc/index.rst index b92db937..a9942113 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -13,6 +13,7 @@ Widgets widgets/kmeans widgets/univariatepolynomialregression + widgets/gradientdescent widgets/polynomialclassification Indices and tables diff --git a/doc/widgets/gradientdescent.rst b/doc/widgets/gradientdescent.rst new file mode 100644 index 00000000..347674a7 --- /dev/null +++ b/doc/widgets/gradientdescent.rst @@ -0,0 +1,108 @@ +Gradient Descent +================ + +.. figure:: icons/gradient-descent.png + +Educational widget that shows gradient descent algorithm on a logistic or linear regression. + +Signals +------- + +**Inputs**: + +- **Data** + +Input data set. + +**Outputs**: + +- **Data** + +Data with columns selected in widget. + +- **Classifier** + +Model produced on the current step of the algorithm. + +- **Coefficients** + +Logistic regression coefficients on the current step of the algorithm. + +Description +----------- + +This widget shows steps of `gradient descent `__ for a logistic and +linear regression step by step. Gradient descent is demonstrated on two attributes that are selected by user. + +Gradient descent is performed on logistic regression if class in data set is discrete and linear regression if class is +continuous. + +.. figure:: images/gradient-descent-stamped.png + +1. Select two attributes (**x** and **y**) on which gradient descent algorithm is preformed. + Select **target class**. It is class that is classified against all other classes. + +2. **Learning rate** is step size in a gradient descent + + With **stochastic** checkbox you can select whether gradient descent is + `stochastic `__ or not. + If stochastic is checked you can set **step size** that is amount of steps of stochastic gradient descent + performed in one press on step button. + + **Restart**: start algorithm from beginning + +3. **Step**: perform one step of the algorithm + + **Step back**: make a step back in the algorithm + +4. **Run**: automatically perform several steps until algorithm converge + + **Speed**: set speed of automatic stepping + +5. **Save Image** saves the image to the computer in a .svg or .png + format. + + **Report** includes widget parameters and visualization in the report. + +Example +------- + +In Orange we connected *File* widget with *Iris* data set to *Gradient Descent* widget. Iris data set has discrete class +so *Logistic regression* will be used this time. +We connected outputs of the widget to *Predictions* widget to see how data are classified and *Data Table* widget where +we inspect coefficients of logistic regression. + +.. figure:: images/gradient-descent-flow.png + +We opened *Gradient Descent* widget and set *X* to *sepal width* and *Y* to *sepal length*. Target class is set to +*Iris-virginica*. We set *learning rate* to 0.02. With click in graph we set beginning coefficients (red dot). + +.. figure:: images/gradient-descent1.png + +We performs step of the algorithm with pressing **Step** button. When we get bored with clicking we can finish stepping +with press on **Run** button. + +.. figure:: images/gradient-descent2.png + +If we want to go back in the algorithm we can do it with pressing **Step back** button. This will also change model. +Current model uses positions of last coefficients (red-yellow dot). + +.. figure:: images/gradient-descent3.png + +In the end we want to see predictions for input data so we can open *Predictions* widget. Predictions are listed in +left column. We can compare this predictions to real classes. + +.. figure:: images/gradient-descent4.png + +If we want to demonstrate *linear regression* we can change data set to *Housing*. That data set has a +continuous class variable. When using linear regression we can select only one feature what means that our function +is linear. The another parameter that is plotted in the graph is +`intercept `__ of a +`linear function `__. + +This time we selected *INDUS* as a +`independent variable `__. +In widget we can make same actions as before. In the end we can also check predictions for each point with *Predictions* +widget. And coefficients of linear regression in a *Data Table*. + +.. figure:: images/gradient-descent-housing.png diff --git a/doc/widgets/icons/gradient-descent.png b/doc/widgets/icons/gradient-descent.png new file mode 100644 index 00000000..c26801cf Binary files /dev/null and b/doc/widgets/icons/gradient-descent.png differ diff --git a/doc/widgets/images/gradient-descent-flow.png b/doc/widgets/images/gradient-descent-flow.png new file mode 100644 index 00000000..59ec28bd Binary files /dev/null and b/doc/widgets/images/gradient-descent-flow.png differ diff --git a/doc/widgets/images/gradient-descent-housing.png b/doc/widgets/images/gradient-descent-housing.png new file mode 100644 index 00000000..1b306703 Binary files /dev/null and b/doc/widgets/images/gradient-descent-housing.png differ diff --git a/doc/widgets/images/gradient-descent-stamped.png b/doc/widgets/images/gradient-descent-stamped.png new file mode 100644 index 00000000..c386f84a Binary files /dev/null and b/doc/widgets/images/gradient-descent-stamped.png differ diff --git a/doc/widgets/images/gradient-descent-tags.txt b/doc/widgets/images/gradient-descent-tags.txt new file mode 100644 index 00000000..4405a2ee --- /dev/null +++ b/doc/widgets/images/gradient-descent-tags.txt @@ -0,0 +1,5 @@ +0 92 33 +1 100 189 +2 170 372 +3 90 500 +4 119 609 diff --git a/doc/widgets/images/gradient-descent.png b/doc/widgets/images/gradient-descent.png new file mode 100644 index 00000000..e681f88f Binary files /dev/null and b/doc/widgets/images/gradient-descent.png differ diff --git a/doc/widgets/images/gradient-descent1.png b/doc/widgets/images/gradient-descent1.png new file mode 100644 index 00000000..1d2b405b Binary files /dev/null and b/doc/widgets/images/gradient-descent1.png differ diff --git a/doc/widgets/images/gradient-descent2.png b/doc/widgets/images/gradient-descent2.png new file mode 100644 index 00000000..9b8c550f Binary files /dev/null and b/doc/widgets/images/gradient-descent2.png differ diff --git a/doc/widgets/images/gradient-descent3.png b/doc/widgets/images/gradient-descent3.png new file mode 100644 index 00000000..8b13b9a6 Binary files /dev/null and b/doc/widgets/images/gradient-descent3.png differ diff --git a/doc/widgets/images/gradient-descent4.png b/doc/widgets/images/gradient-descent4.png new file mode 100644 index 00000000..f75c03d4 Binary files /dev/null and b/doc/widgets/images/gradient-descent4.png differ diff --git a/orangecontrib/educational/widgets/icons/GradientDescent.svg b/orangecontrib/educational/widgets/icons/GradientDescent.svg new file mode 100644 index 00000000..cc773e74 --- /dev/null +++ b/orangecontrib/educational/widgets/icons/GradientDescent.svg @@ -0,0 +1,121 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + diff --git a/orangecontrib/educational/widgets/owgradientdescent.py b/orangecontrib/educational/widgets/owgradientdescent.py new file mode 100644 index 00000000..2b515ea5 --- /dev/null +++ b/orangecontrib/educational/widgets/owgradientdescent.py @@ -0,0 +1,844 @@ +from Orange.canvas import report +from os import path +import time + +import numpy as np +from PyQt4.QtCore import pyqtSlot, Qt, QThread, pyqtSignal +from PyQt4.QtGui import QSizePolicy, QPixmap, QColor, QIcon + +from Orange.widgets.utils import itemmodels +from Orange.classification import Model +from Orange.data import Table, ContinuousVariable, Domain, DiscreteVariable, \ + StringVariable +from Orange.widgets import gui +from Orange.widgets import highcharts +from Orange.widgets import settings +from Orange.widgets.widget import OWWidget, Msg +from Orange.preprocess.preprocess import Normalize +from scipy.interpolate import splprep, splev + +from orangecontrib.educational.widgets.utils.linear_regression import \ + LinearRegression +from orangecontrib.educational.widgets.utils.logistic_regression \ + import LogisticRegression +from orangecontrib.educational.widgets.utils.contour import Contour + + +class Scatterplot(highcharts.Highchart): + """ + Scatterplot extends Highchart and just defines some sane defaults: + * enables scroll-wheel zooming, + * set callback functions for click (in empty chart), drag and drop + * enables moving of centroids points + * include drag_drop_js script by highcharts + """ + + js_click_function = """/**/(function(e) { + window.pybridge.chart_clicked(e.xAxis[0].value, e.yAxis[0].value); + }) + """ + + # to make unit tesest + count_replots = 0 + + def __init__(self, click_callback, **kwargs): + + # read javascript for drag and drop + with open( + path.join(path.dirname(__file__), 'resources', + 'highcharts-contour.js'), 'r') as f: + contours_js = f.read() + + super().__init__(enable_zoom=True, + bridge=self, + enable_select='', + chart_events_click=self.js_click_function, + plotOptions_series_states_hover_enabled=False, + chart_panning=False, + javascript=contours_js, + **kwargs) + + self.click_callback = click_callback + + def chart(self, *args, **kwargs): + self.count_replots += 1 + super(Scatterplot, self).chart(*args, **kwargs) + + @pyqtSlot(float, float) + def chart_clicked(self, x, y): + """ + Function is called from javascript when click event happens + """ + self.click_callback(x, y) + + def remove_series(self, idx): + """ + Function remove series with id idx + """ + self.evalJS(""" + series = chart.get('{id}'); + if (series != null) + series.remove(true); + """.format(id=idx)) + + def remove_last_point(self, idx): + """ + Function remove last point from series with id idx + """ + self.evalJS(""" + series = chart.get('{id}'); + if (series != null) + series.removePoint(series.data.length - 1, true); + """.format(id=idx)) + + def add_series(self, series): + """ + Function add series to the chart + """ + for i, s in enumerate(series): + self.exposeObject('series%d' % i, series[i]) + self.evalJS("chart.addSeries(series%d, true);" % i) + + def add_point_to_series(self, idx, point): + """ + Function add point to the series with id idx + """ + self.exposeObject('point', point) + self.evalJS(""" + series = chart.get('{id}'); + series.addPoint(point); + """.format(id=idx)) + + +class Autoplay(QThread): + """ + Class used for separated thread when using "Autoplay" for gradient descent + + Parameters + ---------- + ow_gradient_descent : OWGradientDescent + Instance of OWGradientDescent class + """ + + def __init__(self, ow_gradient_descent): + QThread.__init__(self) + self.ow_gradient_descent = ow_gradient_descent + + def __del__(self): + self.wait() + + def run(self): + """ + Stepping through the algorithm until converge or user interrupts + """ + while (not self.ow_gradient_descent.learner.converged and + self.ow_gradient_descent.auto_play_enabled and + self.ow_gradient_descent.learner.step_no <= 500): + self.ow_gradient_descent.step_trigger.emit() + time.sleep(2 - self.ow_gradient_descent.auto_play_speed) + self.ow_gradient_descent.stop_auto_play_trigger.emit() + + +class OWGradientDescent(OWWidget): + """ + Gradient descent widget algorithm + """ + + name = "Gradient Descent" + description = "Widget shows the procedure of gradient descent " \ + "on logistic regression." + icon = "icons/GradientDescent.svg" + want_main_area = True + + inputs = [("Data", Table, "set_data")] + outputs = [("Model", Model), + ("Coefficients", Table), + ("Data", Table)] + + graph_name = "Gradient descent graph" + + # selected attributes in chart + attr_x = settings.Setting('') + attr_y = settings.Setting('') + target_class = settings.Setting('') + alpha = settings.Setting(0.1) + step_size = settings.Setting(30) # step size for stochastic gds + auto_play_speed = settings.Setting(1) + stochastic = settings.Setting(False) + + # models + x_var_model = None + y_var_model = None + + # function used in gradient descent + learner_name = "" + learner = None + cost_grid = None + grid_size = 15 + contour_color = "#aaaaaa" + min_x = None + max_x = None + min_y = None + max_y = None + + # data + data = None + selected_data = None + + # autoplay + auto_play_enabled = False + auto_play_button_text = ["Run", "Stop"] + auto_play_thread = None + + # signals + step_trigger = pyqtSignal() + stop_auto_play_trigger = pyqtSignal() + + class Error(OWWidget.Error): + """ + Class used fro widget warnings. + """ + to_few_features = Msg("Too few numeric features.") + no_class = Msg("Data must have a single class attribute") + to_few_values = Msg("Class attribute must have at least two values.") + all_none = Msg("One of the features has no defined values") + + def __init__(self): + super().__init__() + + # var models + self.x_var_model = itemmodels.VariableListModel() + self.y_var_model = itemmodels.VariableListModel() + + # info box + self.info_box = gui.widgetBox(self.controlArea, "Info") + self.learner_label = gui.label(widget=self.info_box, master=self, label="") + + # options box + policy = QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) + + self.options_box = gui.widgetBox(self.controlArea, "Data") + opts = dict( + widget=self.options_box, master=self, orientation=Qt.Horizontal, + callback=self.change_attributes, sendSelectedValue=True, + maximumContentsLength=15 + ) + self.cbx = gui.comboBox(value='attr_x', label='X:', **opts) + self.cby = gui.comboBox(value='attr_y', label='Y:', **opts) + self.target_class_combobox = gui.comboBox( + value='target_class', label='Target class: ', **opts) + + self.cbx.setModel(self.x_var_model) + self.cby.setModel(self.y_var_model) + + gui.separator(self.controlArea, 20, 20) + + # properties box + self.properties_box = gui.widgetBox(self.controlArea, "Properties") + self.alpha_spin = gui.spin( + widget=self.properties_box, master=self, callback=self.change_alpha, + value="alpha", label="Learning rate: ", + minv=0.001, maxv=100, step=0.001, spinType=float, decimals=3, + alignment=Qt.AlignRight, controlWidth=80) + self.stochastic_checkbox = gui.checkBox( + widget=self.properties_box, master=self, + callback=self.change_stochastic, value="stochastic", + label="Stochastic ") + self.step_size_spin = gui.spin( + widget=self.properties_box, master=self, callback=self.change_step, + value="step_size", label="Step size: ", + minv=1, maxv=100, step=1, alignment=Qt.AlignRight, controlWidth=80) + self.restart_button = gui.button( + widget=self.properties_box, master=self, + callback=self.restart, label="Restart") + + self.alpha_spin.setSizePolicy(policy) + self.step_size_spin.setSizePolicy(policy) + + gui.separator(self.controlArea, 20, 20) + + # step box + self.step_box = gui.widgetBox(self.controlArea, "Manually step through") + self.step_button = gui.button( + widget=self.step_box, master=self, callback=self.step, label="Step", + default=True) + self.step_back_button = gui.button( + widget=self.step_box, master=self, callback=self.step_back, + label="Step back") + + gui.separator(self.controlArea, 20, 20) + + # run box + self.run_box = gui.widgetBox(self.controlArea, "Run") + self.auto_play_button = gui.button( + widget=self.run_box, master=self, + label=self.auto_play_button_text[0], callback=self.auto_play) + self.auto_play_speed_spinner = gui.hSlider( + widget=self.run_box, master=self, value='auto_play_speed', + minValue=0, maxValue=1.91, step=0.1, + intOnly=False, createLabel=False, label='Speed:') + + # graph in mainArea + self.scatter = Scatterplot(click_callback=self.change_theta, + xAxis_gridLineWidth=0, + yAxis_gridLineWidth=0, + title_text='', + tooltip_shared=False, + debug=True, + legend_symbolWidth=0, + legend_symbolHeight=0) + # TODO: set false when end of development + gui.rubber(self.controlArea) + + # Just render an empty chart so it shows a nice 'No data to display' + self.scatter.chart() + self.mainArea.layout().addWidget(self.scatter) + + self.step_size_lock() + self.step_back_button_lock() + + def set_data(self, data): + """ + Function receives data from input and init part of widget if data + satisfy. Otherwise set empty plot and notice + user about that + + Parameters + ---------- + data : Table + Input data + """ + d = data + + def reset_combos(): + self.x_var_model[:] = [] + self.y_var_model[:] = [] + self.target_class_combobox.clear() + + def init_combos(): + """ + function initialize the combos with attributes + """ + reset_combos() + + c_vars = [var for var in d.domain.attributes if var.is_continuous] + + self.x_var_model[:] = c_vars + self.y_var_model[:] = c_vars if self.is_logistic else [] + + for i, var in (enumerate(d.domain.class_var.values) + if d.domain.class_var.is_discrete else []): + pix_map = QPixmap(60, 60) + color = tuple(d.domain.class_var.colors[i].tolist()) + pix_map.fill(QColor(*color)) + self.target_class_combobox.addItem(QIcon(pix_map), var) + + self.cby.setDisabled(not self.is_logistic) + self.target_class_combobox.setDisabled(not self.is_logistic) + + self.Error.clear() + + # clear variables + self.cost_grid = None + self.learner = None + self.selected_data = None + self.data = None + self.set_empty_plot() + + self.send_output() + + self.cby.setDisabled(False) + self.target_class_combobox.setDisabled(False) + self.learner_name = "" + + if data is None or len(data) == 0: + reset_combos() + elif d.domain.class_var is None: + reset_combos() + self.Error.no_class() + elif d.domain.class_var.is_continuous: + if sum(True for var in d.domain.attributes + if isinstance(var, ContinuousVariable)) < 1: + # not enough (2) continuous variable + reset_combos() + self.Error.to_few_features() + else: + self.data = data + self.learner_name = "Linear regression" + init_combos() + self.attr_x = self.cbx.itemText(0) + self.step_size_spin.setMaximum(len(d)) + self.restart() + else: # is discrete, if discrete logistic regression is used + if sum(True for var in d.domain.attributes + if isinstance(var, ContinuousVariable)) < 2: + # not enough (2) continuous variable + reset_combos() + self.Error.to_few_features() + elif len(d.domain.class_var.values) < 2: + reset_combos() + self.Error.to_few_values() + self.set_empty_plot() + else: + self.data = data + self.learner_name = "Logistic regression" + init_combos() + self.attr_x = self.cbx.itemText(0) + self.attr_y = self.cbx.itemText(1) + self.target_class = self.target_class_combobox.itemText(0) + self.step_size_spin.setMaximum(len(d)) + self.restart() + + self.learner_label.setText("Learner: " + self.learner_name) + + def set_empty_plot(self): + """ + Function render empty plot + """ + self.scatter.clear() + + def change_attributes(self): + """ + Function changes when user changes attribute or target + """ + self.learner = None # that theta does not same equal + self.restart() + + def restart(self): + """ + Function restarts the algorithm + """ + self.selected_data = self.select_data() + if self.selected_data is None: + self.set_empty_plot() + return + + theta = self.learner.history[0][0] if self.learner is not None else None + selected_learner = (LogisticRegression + if self.learner_name == "Logistic regression" + else LinearRegression) + self.learner = selected_learner( + data=self.selected_data, + alpha=self.alpha, stochastic=self.stochastic, + theta=theta, step_size=self.step_size, + intercept=(self.learner_name == "Linear regression")) + self.replot() + if theta is None: # no previous theta exist + self.change_theta(np.random.uniform(self.min_x, self.max_x), + np.random.uniform(self.min_y, self.max_y)) + else: + self.change_theta(theta[0], theta[1]) + self.send_output() + self.step_back_button_lock() + + def change_alpha(self): + """ + Function changes alpha parameter of the algorithm + """ + if self.learner is not None: + self.learner.set_alpha(self.alpha) + + def change_stochastic(self): + """ + Function changes switches between stochastic or usual algorithm + """ + if self.learner is not None: + self.learner.stochastic = self.stochastic + self.step_size_lock() + + def change_step(self): + if self.learner is not None: + self.learner.stochastic_step_size = self.step_size + + def change_theta(self, x, y): + """ + Function set new theta + """ + if self.learner is not None: + self.learner.set_theta([x, y]) + self.scatter.remove_series("path") + self.scatter.remove_series("last_point") + self.scatter.add_series([ + dict(id="last_point", + data=[dict( + x=x, y=y, dataLabels=dict( + enabled=True, + format=' {0:.2f} '.format( + self.learner.j(np.array([x, y]))), + verticalAlign='middle', + useHTML=True, + align="right", + style=dict( + fontWeight="normal", + textShadow=False + )) + )], + showInLegend=False, + type="scatter", enableMouseTracking=False, + color="#ffcc00", marker=dict(radius=4)), + dict(id="path", data=[dict( + x=x, y=y, h='{0:.2f}'.format( + self.learner.j(np.array([x, y]))))], + showInLegend=False, + type="scatter", lineWidth=1, + color="#ff0000", + marker=dict( + enabled=True, radius=2), + tooltip=dict( + pointFormat="Cost: {point.h}", + shared=False, + valueDecimals=2 + ))]) + self.send_output() + + def step(self): + """ + Function performs one step of the algorithm + """ + if self.data is None: + return + if self.learner.step_no > 500: # limit step no to avoid freezes + return + self.learner.step() + theta = self.learner.theta + self.plot_point(theta[0], theta[1]) + self.send_output() + self.step_back_button_lock() + + def step_back(self): + """ + Function performs step back + """ + if self.data is None: + return + if self.learner.step_no > 0: + self.learner.step_back() + self.scatter.remove_last_point("path") + theta = self.learner.theta + self.plot_last_point(theta[0], theta[1]) + self.send_output() + self.step_back_button_lock() + + def step_back_button_lock(self): + """ + Function lock or unlock step back button. + """ + self.step_back_button.setDisabled( + self.learner is None or self.learner.step_no == 0) + + def step_size_lock(self): + self.step_size_spin.setDisabled(not self.stochastic) + + def plot_point(self, x, y): + """ + Function add point to the path + """ + self.scatter.add_point_to_series("path", dict( + x=x, y=y, h='{0:.2f}'.format(self.learner.j(np.array([x, y]))) + )) + self.plot_last_point(x, y) + + def plot_last_point(self, x, y): + self.scatter.remove_last_point("last_point") + self.scatter.add_point_to_series( + "last_point", + dict( + x=x, y=y, dataLabels=dict( + enabled=True, + format=' {0:.2f} '.format(self.learner.j(np.array([x, y]))), + useHTML=True, + verticalAlign='middle', + align="left" if self.label_right() else "right", + style=dict( + fontWeight="normal", + textShadow=False + )) + )) + + def label_right(self): + l = self.learner + return l.step_no == 0 or l.history[l.step_no - 1][0][0] < l.theta[0] + + def replot(self): + """ + This function performs complete replot of the graph + """ + if self.data is None or self.selected_data is None: + self.set_empty_plot() + return + + optimal_theta = self.learner.optimized() + self.min_x = optimal_theta[0] - 10 + self.max_x = optimal_theta[0] + 10 + self.min_y = optimal_theta[1] - 10 + self.max_y = optimal_theta[1] + 10 + + options = dict(series=[]) + + # gradient and contour + options['series'] += self.plot_gradient_and_contour( + self.min_x, self.max_x, self.min_y, self.max_y) + + # highcharts parameters + kwargs = dict( + xAxis_title_text="

θ{attr}

" + .format(attr=self.attr_x if self.is_logistic else 0), + xAxis_title_useHTML=True, + yAxis_title_text="θ{attr}". + format(attr=self.attr_y if self.is_logistic else self.attr_x), + yAxis_title_useHTML=True, + xAxis_min=self.min_x, + xAxis_max=self.max_x, + yAxis_min=self.min_y, + yAxis_max=self.max_y, + xAxis_startOnTick=False, + xAxis_endOnTick=False, + yAxis_startOnTick=False, + yAxis_endOnTick=False, + colorAxis=dict( + minColor="#ffffff", maxColor="#00BFFF", + endOnTick=False, startOnTick=False), + plotOptions_contour_colsize=(self.max_y - self.min_y) / 1000, + plotOptions_contour_rowsize=(self.max_x - self.min_x) / 1000, + tooltip_headerFormat="", + tooltip_pointFormat="%s: {point.x:.2f}
" + "%s: {point.y:.2f}" % + (self.attr_x, self.attr_y)) + + self.scatter.chart(options, **kwargs) + # to remove the colorAxis legend + self.scatter.evalJS("chart.colorAxis[0].axisParent.destroy();") + + def plot_gradient_and_contour(self, x_from, x_to, y_from, y_to): + """ + Function constructs series for gradient and contour + + Parameters + ---------- + x_from : float + Min grid x value + x_to : float + Max grid x value + y_from : float + Min grid y value + y_to : float + Max grid y value + + Returns + ------- + list + List containing series with background gradient and contour + """ + + # grid for gradient + x = np.linspace(x_from, x_to, self.grid_size) + y = np.linspace(y_from, y_to, self.grid_size) + xv, yv = np.meshgrid(x, y) + thetas = np.column_stack((xv.flatten(), yv.flatten())) + + # cost_values = np.vstack([self.learner.j(theta) for theta in thetas]) + cost_values = self.learner.j(thetas) + + # results + self.cost_grid = cost_values.reshape(xv.shape) + + return (self.plot_gradient(xv, yv, self.cost_grid) + + self.plot_contour(xv, yv, self.cost_grid)) + + def plot_gradient(self, x, y, grid): + """ + Function constructs background gradient + """ + return [dict(data=[[x[j, k], y[j, k], grid[j, k]] for j in range(len(x)) + for k in range(y.shape[1])], + grid_width=self.grid_size, + type="contour")] + + def plot_contour(self, xv, yv, cost_grid): + """ + Function constructs contour lines + """ + contour = Contour(xv, yv, cost_grid) + contour_lines = contour.contours( + np.linspace(np.min(cost_grid), np.max(cost_grid), 20)) + + series = [] + count = 0 + for key, value in contour_lines.items(): + for line in value: + if len(line) > 3: + tck, u = splprep(np.array(line).T, u=None, s=0.0, per=0) + u_new = np.linspace(u.min(), u.max(), 100) + x_new, y_new = splev(u_new, tck, der=0) + interpol_line = np.c_[x_new, y_new] + else: + interpol_line = line + + series.append(dict(data=interpol_line, + color=self.contour_color, + type="spline", + lineWidth=0.5, + showInLegend=False, + marker=dict(enabled=False), + name="%g" % round(key, 2), + enableMouseTracking=False + )) + count += 1 + return series + + def select_data(self): + """ + Function takes two selected columns from data table and merge them + in new Orange.data.Table + + Returns + ------- + Table + Table with selected columns + """ + if self.data is None: + return + + self.Error.clear() + + attr_x = self.data.domain[self.attr_x] + attr_y = self.data.domain[self.attr_y] if self.is_logistic else None + cols = [] + for attr in (attr_x, attr_y) if attr_y is not None else (attr_x, ): + subset = self.data[:, attr] + cols.append(subset.X) + x = np.column_stack(cols) + y_c = self.data.Y + + # remove nans + indices = ~np.isnan(x).any(axis=1) & ~np.isnan(y_c) + x = x[indices] + y_c = y_c[indices] + + if len(x) == 0: + self.Error.all_none() + return None + + if self.is_logistic: + two_classes = len(self.data.domain.class_var.values) == 2 + if two_classes: + domain = Domain([attr_x, attr_y], [self.data.domain.class_var]) + else: + domain = Domain( + [attr_x, attr_y], + [DiscreteVariable( + name=self.data.domain.class_var.name + "-bin", + values=[self.target_class, 'Others'])], + [self.data.domain.class_var]) + + y = [(0 if self.data.domain.class_var.values[int(d)] == + self.target_class else 1) + for d in y_c] + + return Normalize(Table(domain, x, y_c) if two_classes + else Table(domain, x, y, y_c[:, None])) + else: + domain = Domain([attr_x], self.data.domain.class_var) + return Normalize( + Table(domain, x, y_c), transform_class=True) + + def auto_play(self): + """ + Function called when autoplay button pressed + """ + if self.data is not None: + self.auto_play_enabled = not self.auto_play_enabled + self.auto_play_button.setText( + self.auto_play_button_text[self.auto_play_enabled]) + if self.auto_play_enabled: + self.disable_controls(self.auto_play_enabled) + self.auto_play_thread = Autoplay(self) + self.step_trigger.connect(self.step) + self.stop_auto_play_trigger.connect(self.stop_auto_play) + self.auto_play_thread.start() + else: + self.stop_auto_play() + + def stop_auto_play(self): + """ + Called when stop autoplay button pressed or in the end of autoplay + """ + self.auto_play_enabled = False + self.disable_controls(self.auto_play_enabled) + self.auto_play_button.setText( + self.auto_play_button_text[self.auto_play_enabled]) + + def disable_controls(self, disabled): + """ + Function disable or enable all controls except those from run part + """ + self.step_box.setDisabled(disabled) + self.options_box.setDisabled(disabled) + self.properties_box.setDisabled(disabled) + + def send_output(self): + """ + Function sends output + """ + self.send_model() + self.send_coefficients() + self.send_data() + + def send_model(self): + """ + Function sends model on output. + """ + if self.learner is not None and self.learner.theta is not None: + self.send("Model", self.learner.model) + else: + self.send("Model", None) + + def send_coefficients(self): + """ + Function sends logistic regression coefficients on output. + """ + if self.learner is not None and self.learner.theta is not None: + domain = Domain( + [ContinuousVariable("Coefficients", number_of_decimals=7)], + metas=[StringVariable("Name")]) + names = ["theta 0", "theta 1"] + + coefficients_table = Table( + domain, list(zip(list(self.learner.theta), names))) + self.send("Coefficients", coefficients_table) + else: + self.send("Coefficients", None) + + def send_data(self): + """ + Function sends data on output. + """ + if self.selected_data is not None: + self.send("Data", self.selected_data) + else: + self.send("Data", None) + + key_actions = {(0, Qt.Key_Space): step} # space button for step + + def keyPressEvent(self, e): + """ + Handle default key actions in this widget + """ + if (int(e.modifiers()), e.key()) in self.key_actions: + fun = self.key_actions[(int(e.modifiers()), e.key())] + fun(self) + else: + super(OWGradientDescent, self).keyPressEvent(e) + + @property + def is_logistic(self): + return self.learner_name == "Logistic regression" + + def send_report(self): + if self.data is None: + return + caption = report.render_items_vert(( + ("Stochastic", str(self.stochastic)), + )) + self.report_plot(self.scatter) + self.report_caption(caption) \ No newline at end of file diff --git a/orangecontrib/educational/widgets/tests/test_owgradientdescent.py b/orangecontrib/educational/widgets/tests/test_owgradientdescent.py new file mode 100644 index 00000000..8f743bd9 --- /dev/null +++ b/orangecontrib/educational/widgets/tests/test_owgradientdescent.py @@ -0,0 +1,831 @@ +from numpy.testing import * +import numpy as np +from PyQt4.QtCore import Qt, QEvent +from PyQt4.QtGui import QKeyEvent + +from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable +from Orange.widgets.tests.base import WidgetTest + +from orangecontrib.educational.widgets.owgradientdescent import \ + OWGradientDescent + + +class TestOWGradientDescent(WidgetTest): + + def setUp(self): + self.widget = self.create_widget(OWGradientDescent) + self.iris = Table('iris') + self.housing = Table('housing') + + def test_set_data(self): + """ + Test set data + """ + w = self.widget + + # test on init + self.assertIsNone(w.data) + self.assertEqual(w.cbx.count(), 0) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertIsNone(w.learner) + self.assertIsNone(w.cost_grid) + + # call with none data + self.send_signal("Data", None) + self.assertIsNone(w.data) + self.assertEqual(w.cbx.count(), 0) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertIsNone(w.learner) + self.assertIsNone(w.cost_grid) + + # call with no class variable + table_no_class = Table( + Domain([ContinuousVariable("x"), ContinuousVariable("y")]), + [[1, 2], [2, 3]]) + self.send_signal("Data", table_no_class) + self.assertIsNone(w.data) + self.assertEqual(w.cbx.count(), 0) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertIsNone(w.learner) + self.assertIsNone(w.cost_grid) + self.assertTrue(w.Error.no_class.is_shown()) + + # with only one class value + table_one_class = Table( + Domain([ContinuousVariable("x"), ContinuousVariable("y")], + DiscreteVariable("a", values=["k"])), + [[1, 2], [2, 3]], [0, 0]) + self.send_signal("Data", table_one_class) + self.assertIsNone(w.data) + self.assertEqual(w.cbx.count(), 0) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertIsNone(w.learner) + self.assertIsNone(w.cost_grid) + self.assertTrue(w.Error.to_few_values.is_shown()) + + # not enough continuous variables + table_no_enough_cont = Table( + Domain( + [ContinuousVariable("x"), + DiscreteVariable("y", values=["a", "b"])], + DiscreteVariable("a", values=['a', 'b'])), + [[1, 0], [2, 1]], [0, 1]) + self.send_signal("Data", table_no_enough_cont) + self.assertIsNone(w.data) + self.assertEqual(w.cbx.count(), 0) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertIsNone(w.learner) + self.assertIsNone(w.cost_grid) + self.assertTrue(w.Error.to_few_features.is_shown()) + + # init with ok data, discrete class - logistic regression + num_continuous_attributes = sum( + True for var in self.iris.domain.attributes + if isinstance(var, ContinuousVariable)) + + self.send_signal("Data", self.iris) + self.assertEqual(w.cbx.count(), num_continuous_attributes) + self.assertEqual(w.cby.count(), num_continuous_attributes) + self.assertEqual( + w.target_class_combobox.count(), + len(self.iris.domain.class_var.values)) + self.assertEqual(w.cbx.currentText(), self.iris.domain[0].name) + self.assertEqual(w.cby.currentText(), self.iris.domain[1].name) + self.assertEqual( + w.target_class_combobox.currentText(), + self.iris.domain.class_var.values[0]) + + self.assertEqual(w.attr_x, self.iris.domain[0].name) + self.assertEqual(w.attr_y, self.iris.domain[1].name) + self.assertEqual(w.target_class, self.iris.domain.class_var.values[0]) + + # change showed attributes + w.attr_x = self.iris.domain[1].name + w.attr_y = self.iris.domain[2].name + w.target_class = self.iris.domain.class_var.values[1] + + self.assertEqual(w.cbx.currentText(), self.iris.domain[1].name) + self.assertEqual(w.cby.currentText(), self.iris.domain[2].name) + self.assertEqual( + w.target_class_combobox.currentText(), + self.iris.domain.class_var.values[1]) + + self.assertEqual(w.attr_x, self.iris.domain[1].name) + self.assertEqual(w.attr_y, self.iris.domain[2].name) + self.assertEqual(w.target_class, self.iris.domain.class_var.values[1]) + + # remove data + self.send_signal("Data", None) + self.assertIsNone(w.data) + self.assertEqual(w.cbx.count(), 0) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertIsNone(w.learner) + self.assertIsNone(w.cost_grid) + + # not enough continuous variables when continuous class + table_no_enough_cont = Table( + Domain( + [DiscreteVariable("y", values=["a", "b"])], + ContinuousVariable("a")), + [[1, 0], [2, 1]], [0, 1]) + self.send_signal("Data", table_no_enough_cont) + self.assertIsNone(w.data) + self.assertEqual(w.cbx.count(), 0) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertIsNone(w.learner) + self.assertIsNone(w.cost_grid) + self.assertTrue(w.Error.to_few_features.is_shown()) + + # init with ok data, discrete class - linear regression + num_continuous_attributes = sum( + True for var in self.housing.domain.attributes + if isinstance(var, ContinuousVariable)) + + self.send_signal("Data", self.housing) + self.assertEqual(w.cbx.count(), num_continuous_attributes) + self.assertEqual(w.cby.count(), 0) + self.assertEqual(w.target_class_combobox.count(), 0) + self.assertFalse(w.cby.isEnabled()) + self.assertFalse(w.target_class_combobox.isEnabled()) + self.assertEqual(w.cbx.currentText(), self.housing.domain[0].name) + + self.assertEqual(w.attr_x, self.housing.domain[0].name) + + # change showed attributes + w.attr_x = self.housing.domain[1].name + + self.assertEqual(w.cbx.currentText(), self.housing.domain[1].name) + + self.assertEqual(w.attr_x, self.housing.domain[1].name) + + def test_restart(self): + """ + Test if restart works fine + """ + w = self.widget + + # check if init is as expected + self.assertIsNone(w.selected_data) + self.assertIsNone(w.learner) + + # with logistic regression + self.send_signal("Data", self.iris) + self.assertEqual(len(w.selected_data), len(self.iris)) + assert_array_equal(w.learner.x, w.selected_data.X) + assert_array_equal(w.learner.y, w.selected_data.Y) + assert_array_equal(w.learner.domain, w.selected_data.domain) + self.assertEqual(w.learner.alpha, w.alpha) + self.assertEqual(w.learner.stochastic, False) + self.assertEqual(w.learner.stochastic_step_size, w.step_size) + + # with linear regression + self.send_signal("Data", self.housing) + self.assertEqual(len(w.selected_data), len(self.housing)) + assert_array_equal(w.learner.x[:, 1][:, None], w.selected_data.X) + # because of intercept + assert_array_equal(w.learner.y, w.selected_data.Y) + assert_array_equal(w.learner.domain, w.selected_data.domain) + self.assertEqual(w.learner.alpha, w.alpha) + self.assertEqual(w.learner.stochastic, False) + self.assertEqual(w.learner.stochastic_step_size, w.step_size) + + # click on restart + old_theta = np.copy(w.learner.theta) + w.restart_button.click() + assert_array_equal(w.learner.theta, old_theta) + + # again no data + self.send_signal("Data", None) + self.assertIsNone(w.selected_data) + self.assertIsNone(w.learner) + + def test_change_alpha(self): + """ + Function check if alpha is changing correctly + """ + w = self.widget + + # to define learner + self.send_signal("Data", self.iris) + + # check init alpha + self.assertEqual(w.learner.alpha, 0.1) + + # change alpha + w.alpha_spin.setValue(1) + self.assertEqual(w.learner.alpha, 1) + w.alpha_spin.setValue(0.3) + self.assertEqual(w.learner.alpha, 0.3) + + # just check if nothing happens when no learner + self.send_signal("Data", None) + self.assertIsNone(w.learner) + w.alpha_spin.setValue(5) + + def test_change_stochastic(self): + """ + Test changing stochastic + """ + w = self.widget + + # define learner + self.send_signal("Data", self.iris) + + # check init + self.assertFalse(w.learner.stochastic) + + # change stochastic + w.stochastic_checkbox.click() + self.assertTrue(w.learner.stochastic) + w.stochastic_checkbox.click() + self.assertFalse(w.learner.stochastic) + + # just check if nothing happens when no learner + self.send_signal("Data", None) + self.assertIsNone(w.learner) + w.stochastic_checkbox.click() + + def test_change_step(self): + """ + Function check if change step works correctly + """ + w = self.widget + + # to define learner + self.send_signal("Data", self.iris) + + # check init alpha + self.assertEqual(w.learner.stochastic_step_size, 30) + + # change alpha + w.step_size_spin.setValue(50) + self.assertEqual(w.learner.stochastic_step_size, 50) + w.step_size_spin.setValue(40) + self.assertEqual(w.learner.stochastic_step_size, 40) + + # just check if nothing happens when no learner + self.send_signal("Data", None) + self.assertIsNone(w.learner) + w.step_size_spin.setValue(30) + + def test_change_theta(self): + """ + Test setting theta + """ + w = self.widget + + # to define learner + self.send_signal("Data", self.iris) + + # check init theta + self.assertIsNotNone(w.learner.theta) + + # change theta + w.change_theta(1, 1) + assert_array_equal(w.learner.theta, [1, 1]) + w.scatter.chart_clicked(1, 2) + assert_array_equal(w.learner.theta, [1, 2]) + + # just check if nothing happens when no learner + self.send_signal("Data", None) + self.assertIsNone(w.learner) + w.change_theta(1, 1) + + def test_step(self): + """ + Test step + """ + w = self.widget + + # test function not crashes when no data and learner + w.step() + + self.send_signal("Data", self.iris) + + # test theta set after step if not set yet + w.step() + self.assertIsNotNone(w.learner.theta) + + # check theta is changing when step + old_theta = np.copy(w.learner.theta) + w.step() + self.assertNotEqual(sum(old_theta - w.learner.theta), 0) + + # with linear regression + self.send_signal("Data", self.housing) + + # test theta set after step if not set yet + w.step() + self.assertIsNotNone(w.learner.theta) + + # check theta is changing when step + old_theta = np.copy(w.learner.theta) + w.step() + self.assertNotEqual(sum(old_theta - w.learner.theta), 0) + + # check steps not allowed after 500 + for i in range(500): + w.step() + self.assertEqual(w.learner.step_no, 501) + + def test_step_space(self): + """ + Test step + """ + w = self.widget + + event = QKeyEvent( + QEvent.KeyPress, Qt.Key_Space, Qt.KeyboardModifiers(0)) + + # test function not crashes when no data and learner + w.keyPressEvent(event) + + self.send_signal("Data", self.iris) + + # test theta set after step if not set yet + w.keyPressEvent(event) + self.assertIsNotNone(w.learner.theta) + + # check theta is changing when step + old_theta = np.copy(w.learner.theta) + w.keyPressEvent(event) + self.assertNotEqual(sum(old_theta - w.learner.theta), 0) + + # with linear regression + self.send_signal("Data", self.housing) + + # test theta set after step if not set yet + w.keyPressEvent(event) + self.assertIsNotNone(w.learner.theta) + + # check theta is changing when step + old_theta = np.copy(w.learner.theta) + w.keyPressEvent(event) + self.assertNotEqual(sum(old_theta - w.learner.theta), 0) + + old_theta = np.copy(w.learner.theta) + # to cover else example and check not crashes + event = QKeyEvent( + QEvent.KeyPress, Qt.Key_Q, Qt.KeyboardModifiers(0)) + w.keyPressEvent(event) + + # check nothing changes + assert_array_equal(old_theta, w.learner.theta) + + def test_step_back(self): + """ + Test stepping back + """ + w = self.widget + + # test function not crashes when no data and learner + w.step_back() + + self.send_signal("Data", self.iris) + + # test step back not performed when step_no == 0 + old_theta = np.copy(w.learner.theta) + w.step_back() + assert_array_equal(w.learner.theta, old_theta) + + # test same theta when step performed + w.change_theta(1.0, 1.0) + theta = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta, w.learner.theta) + + w.change_theta(1.0, 1.0) + theta1 = np.copy(w.learner.theta) + w.step() + theta2 = np.copy(w.learner.theta) + w.step() + theta3 = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta3, w.learner.theta) + w.step_back() + assert_array_equal(theta2, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + + # test for stochastic + w.stochastic_checkbox.click() + + w.change_theta(1.0, 1.0) + theta = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta, w.learner.theta) + + w.change_theta(1.0, 1.0) + theta1 = np.copy(w.learner.theta) + w.step() + theta2 = np.copy(w.learner.theta) + w.step() + theta3 = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta3, w.learner.theta) + w.step_back() + assert_array_equal(theta2, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + + # test mix stochastic and normal + # now it is stochastic + + w.change_theta(1.0, 1.0) + theta1 = np.copy(w.learner.theta) + w.step() + theta2 = np.copy(w.learner.theta) + w.step() + w.stochastic_checkbox.click() + theta3 = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta3, w.learner.theta) + w.step_back() + assert_array_equal(theta2, w.learner.theta) + w.step_back() + w.stochastic_checkbox.click() + assert_array_equal(theta1, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + + # with linear regression + self.send_signal("Data", self.housing) + + # test step back not performed when step_no == 0 + old_theta = np.copy(w.learner.theta) + w.step_back() + assert_array_equal(w.learner.theta, old_theta) + + # test same theta when step performed + w.change_theta(1.0, 1.0) + theta = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta, w.learner.theta) + + w.change_theta(1.0, 1.0) + theta1 = np.copy(w.learner.theta) + w.step() + theta2 = np.copy(w.learner.theta) + w.step() + theta3 = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta3, w.learner.theta) + w.step_back() + assert_array_equal(theta2, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + + # test for stochastic + w.stochastic_checkbox.click() + + w.change_theta(1.0, 1.0) + theta = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta, w.learner.theta) + + w.change_theta(1.0, 1.0) + theta1 = np.copy(w.learner.theta) + w.step() + theta2 = np.copy(w.learner.theta) + w.step() + theta3 = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta3, w.learner.theta) + w.step_back() + assert_array_equal(theta2, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + + # test mix stochastic and normal + # now it is stochastic + + w.change_theta(1.0, 1.0) + theta1 = np.copy(w.learner.theta) + w.step() + theta2 = np.copy(w.learner.theta) + w.step() + w.stochastic_checkbox.click() + theta3 = np.copy(w.learner.theta) + w.step() + w.step_back() + assert_array_equal(theta3, w.learner.theta) + w.step_back() + assert_array_equal(theta2, w.learner.theta) + w.step_back() + w.stochastic_checkbox.click() + assert_array_equal(theta1, w.learner.theta) + w.step_back() + assert_array_equal(theta1, w.learner.theta) + + def test_replot(self): + """ + Test replot function and all functions connected with it + """ + w = self.widget + # nothing happens when no data + w.replot() + + self.assertIsNone(w.cost_grid) + self.assertEqual(w.scatter.count_replots, 1) + + self.send_signal("Data", self.iris) + self.assertTupleEqual(w.cost_grid.shape, (w.grid_size, w.grid_size)) + self.assertEqual(w.scatter.count_replots, 2) + + # when step no new re-plots + w.step() + self.assertEqual(w.scatter.count_replots, 2) + + # triggered new re-plot + self.send_signal("Data", self.iris) + self.assertTupleEqual(w.cost_grid.shape, (w.grid_size, w.grid_size)) + self.assertEqual(w.scatter.count_replots, 3) + + # with linear regression + self.send_signal("Data", self.housing) + self.assertTupleEqual(w.cost_grid.shape, (w.grid_size, w.grid_size)) + self.assertEqual(w.scatter.count_replots, 4) + + # when step no new re-plots + w.step() + self.assertEqual(w.scatter.count_replots, 4) + + # triggered new re-plot + self.send_signal("Data", self.iris) + self.assertTupleEqual(w.cost_grid.shape, (w.grid_size, w.grid_size)) + self.assertEqual(w.scatter.count_replots, 5) + + def test_select_data(self): + """ + Test select data function + """ + w = self.widget + + # test for none data + self.send_signal("Data", None) + + self.assertIsNone(w.select_data()) # result is none + + # test on iris + self.send_signal("Data", self.iris) + self.assertEqual(len(w.select_data()), len(self.iris)) + self.assertEqual(len(w.select_data().domain.attributes), 2) + self.assertEqual(len(w.select_data().domain.class_var.values), 2) + self.assertEqual(w.select_data().domain.class_var.values[1], 'Others') + self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x) + self.assertEqual(w.select_data().domain.attributes[1].name, w.attr_y) + self.assertEqual( + w.select_data().domain.class_var.values[0], w.target_class) + + # test on housing - continuous class + self.send_signal("Data", self.housing) + self.assertEqual(len(w.select_data()), len(self.housing)) + self.assertEqual(len(w.select_data().domain.attributes), 1) + self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x) + self.assertTrue(w.select_data().domain.class_var.is_continuous) + + # test with data set for logistic regression - class discrete + # there no other class value is provided + domain = Domain([ContinuousVariable('a'), ContinuousVariable('b')], + DiscreteVariable('c', values=['a', 'b'])) + data = Table(domain, [[1, 2], [1, 2]], [0, 1]) + + self.send_signal("Data", data) + self.assertEqual(len(w.select_data()), len(data)) + self.assertEqual(len(w.select_data().domain.attributes), 2) + self.assertEqual(len(w.select_data().domain.class_var.values), 2) + self.assertEqual( + w.select_data().domain.class_var.values[1], + data.domain.class_var.values[1]) + self.assertEqual( + w.select_data().domain.class_var.values[0], + data.domain.class_var.values[0]) + self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x) + self.assertEqual(w.select_data().domain.attributes[1].name, w.attr_y) + self.assertEqual( + w.select_data().domain.class_var.values[0], w.target_class) + + # selected data none when one column only Nones + data = Table(Domain([ContinuousVariable('a'), ContinuousVariable('b')], + DiscreteVariable('c', values=['a', 'b'])), + [[1, None], [1, None]], [0, 1]) + self.send_signal("Data", data) + selected_data = w.select_data() + self.assertIsNone(selected_data) + + data = Table(Domain([ContinuousVariable('a'), ContinuousVariable('b')], + DiscreteVariable('c', values=['a', 'b'])), + [[None, None], [None, None]], [0, 1]) + self.send_signal("Data", data) + selected_data = w.select_data() + self.assertIsNone(selected_data) + + def test_autoplay(self): + """ + Test autoplay functionalities + """ + w = self.widget + + # test if not chrashes when data is none + w.auto_play() + + # set data + self.send_signal("Data", self.iris) + + # check init + self.assertFalse(w.auto_play_enabled) + self.assertEqual(w.auto_play_button.text(), w.auto_play_button_text[0]) + self.assertTrue((w.step_box.isEnabled())) + self.assertTrue((w.options_box.isEnabled())) + self.assertTrue((w.properties_box.isEnabled())) + + # auto play on + w.auto_play() + self.assertTrue(w.auto_play_enabled) + self.assertEqual(w.auto_play_button.text(), w.auto_play_button_text[1]) + self.assertFalse((w.step_box.isEnabled())) + self.assertFalse((w.options_box.isEnabled())) + self.assertFalse((w.properties_box.isEnabled())) + + # stop auto play + w.auto_play() + self.assertFalse(w.auto_play_enabled) + self.assertEqual(w.auto_play_button.text(), w.auto_play_button_text[0]) + self.assertTrue((w.step_box.isEnabled())) + self.assertTrue((w.options_box.isEnabled())) + self.assertTrue((w.properties_box.isEnabled())) + + def test_disable_controls(self): + """ + Test disabling controls + """ + w = self.widget + + # check init + self.assertTrue((w.step_box.isEnabled())) + self.assertTrue((w.options_box.isEnabled())) + self.assertTrue((w.properties_box.isEnabled())) + + # disable + w.disable_controls(True) + self.assertFalse((w.step_box.isEnabled())) + self.assertFalse((w.options_box.isEnabled())) + self.assertFalse((w.properties_box.isEnabled())) + + w.disable_controls(True) + self.assertFalse((w.step_box.isEnabled())) + self.assertFalse((w.options_box.isEnabled())) + self.assertFalse((w.properties_box.isEnabled())) + + # enable + w.disable_controls(False) + self.assertTrue((w.step_box.isEnabled())) + self.assertTrue((w.options_box.isEnabled())) + self.assertTrue((w.properties_box.isEnabled())) + + w.disable_controls(False) + self.assertTrue((w.step_box.isEnabled())) + self.assertTrue((w.options_box.isEnabled())) + self.assertTrue((w.properties_box.isEnabled())) + + def test_send_model(self): + """ + Test sending model + """ + w = self.widget + + # when no learner + self.assertIsNone(self.get_output("Model")) + + # when learner theta set automatically + self.send_signal("Data", self.iris) + self.assertIsNotNone(self.get_output("Model")) + + # when everything fine + w.change_theta(1., 1.) + assert_array_equal(self.get_output("Model").theta, [1., 1.]) + + # when data deleted + self.send_signal("Data", None) + self.assertIsNone(self.get_output("Model")) + + # when learner theta set automatically + self.send_signal("Data", self.housing) + self.assertIsNotNone(self.get_output("Model")) + + # when everything fine + w.change_theta(1., 1.) + assert_array_equal(self.get_output("Model").theta, [1., 1.]) + + # when data deleted + self.send_signal("Data", None) + self.assertIsNone(self.get_output("Model")) + + def test_send_coefficients(self): + w = self.widget + + # when no learner + self.assertIsNone(self.get_output("Coefficients")) + + # when learner but no theta + self.send_signal("Data", self.iris) + self.assertIsNotNone(self.get_output("Coefficients")) + + # when everything fine + w.change_theta(1., 1.) + coef_out = self.get_output("Coefficients") + self.assertEqual(len(coef_out), 2) + self.assertEqual(len(coef_out.domain.attributes), 1) + self.assertEqual(coef_out.domain.attributes[0].name, "Coefficients") + self.assertEqual(len(coef_out.domain.metas), 1) + self.assertEqual(coef_out.domain.metas[0].name, "Name") + + # when data deleted + self.send_signal("Data", None) + self.assertIsNone(self.get_output("Coefficients")) + + # for linear regression + # when learner but no theta + self.send_signal("Data", self.housing) + self.assertIsNotNone(self.get_output("Coefficients")) + + # when everything fine + w.change_theta(1., 1.) + coef_out = self.get_output("Coefficients") + self.assertEqual(len(coef_out), 2) + self.assertEqual(len(coef_out.domain.attributes), 1) + self.assertEqual(coef_out.domain.attributes[0].name, "Coefficients") + self.assertEqual(len(coef_out.domain.metas), 1) + self.assertEqual(coef_out.domain.metas[0].name, "Name") + + def test_send_data(self): + """ + Test sending selected data to output + """ + w = self.widget + + # when no data + self.assertIsNone(self.get_output("Data")) + + # when everything fine + self.send_signal("Data", self.iris) + w.change_theta(1., 1.) + assert_array_equal(self.get_output("Data"), w.selected_data) + + # when data deleted + self.send_signal("Data", None) + self.assertIsNone(self.get_output("Data")) + + def test_change_attributes(self): + """ + Check if reset is ok + """ + w = self.widget + + # when everything fine + self.send_signal("Data", self.iris) + + w.change_attributes() + self.assertIsNotNone(w.learner) + self.assertIsNotNone(w.learner.theta) + + def test_send_report(self): + """ + Test if report does not chrashes + """ + w = self.widget + + # when everything fine + self.send_signal("Data", self.iris) + + w.report_button.click() + + # when no data + # when everything fine + self.send_signal("Data", None) + + w.report_button.click() \ No newline at end of file diff --git a/orangecontrib/educational/widgets/utils/contour.py b/orangecontrib/educational/widgets/utils/contour.py index 009dd6de..287f341d 100644 --- a/orangecontrib/educational/widgets/utils/contour.py +++ b/orangecontrib/educational/widgets/utils/contour.py @@ -40,11 +40,25 @@ def __init__(self, x, y, z): def contours(self, thresholds): contours = {} for t in thresholds: - points = self.find_contours(t) + points = self.clean(self.find_contours(t)) if len(points) > 0: contours[t] = points return contours + def clean(self, contours): + """ + Function removes duplicated points + """ + corrected_contorus = [] + for contour in contours: + points = np.array(contour) + to_delete = [] + for i in range(len(points) - 1): + if np.array_equal(points[i, :], points[i + 1, :]): + to_delete.append(i) + corrected_contorus.append(np.delete(points, to_delete, axis=0)) + return corrected_contorus + def find_contours(self, threshold): contours = [] bitmap = (self.z > threshold).astype(int) diff --git a/orangecontrib/educational/widgets/utils/gradient_descent.py b/orangecontrib/educational/widgets/utils/gradient_descent.py new file mode 100644 index 00000000..c01695cf --- /dev/null +++ b/orangecontrib/educational/widgets/utils/gradient_descent.py @@ -0,0 +1,181 @@ +import numpy as np +from scipy.optimize import fmin_l_bfgs_b + +from Orange.classification import Model + + +class GradientDescent: + """ + Logistic regression algorithm with custom cost and gradient function, + which allow to perform algorithm step by step + + Parameters + ---------- + alpha : float + Learning rate + theta : array_like(float, ndim=1) + Logistic function parameters + data : Orange.data.Table + Data + """ + + x = None + y = None + theta = None + alpha = None + domain = None + step_no = 0 + stochastic_i = 0 + stochastic_step_size = 30 # number of steps in one step + regularization_rate = 0.001 + # very small regularization rate to avoid big parameters + + def __init__(self, alpha=0.1, theta=None, data=None, stochastic=False, + step_size=30, intercept=False): + self.history = [] + self.intercept=intercept + self.set_alpha(alpha) + self.set_data(data) + self.set_theta(theta) + self.stochastic = stochastic + self.stochastic_step_size = step_size + + def set_data(self, data): + """ + Function set the data + """ + if data is not None: + x = data.X + self.x = np.c_[np.ones(len(x)), x] if self.intercept else x + self.y = data.Y + self.domain = data.domain + else: + self.x = None + self.y = None + self.domain = None + + def set_theta(self, theta): + """ + Function sets theta. Can be called from constructor or outside. + """ + if isinstance(theta, (np.ndarray, np.generic)): + self.theta = theta + elif isinstance(theta, list): + self.theta = np.array(theta) + else: + self.theta = None + self.history = self.set_list( + self.history, 0, (np.copy(self.theta), 0, None)) + self.step_no = 0 + + def set_alpha(self, alpha): + """ + Function sets alpha and can be called from constructor or from outside. + """ + self.alpha = alpha + + @property + def model(self): + """ + Function returns model based on current parameters. + """ + raise NotImplementedError("Subclasses should implement this!") + + @property + def converged(self): + """ + Function returns True if gradient descent already converged. + """ + if self.step_no == 0: + return False + return (np.sum(np.abs(self.theta - self.history[self.step_no - 1][0])) / + len(self.theta) < (1e-2 if not self.stochastic else 1e-5)) + + def step(self): + """ + Function performs one step of the gradient descent + """ + self.step_no += 1 + + seed = None # seed that will be stored to revert the shuffle + # if we came around all data set index to zero and permute data + if self.stochastic_i + self.stochastic_step_size >= len(self.x): + self.stochastic_i = 0 + + # shuffle data + seed = np.random.randint(100) # random seed + np.random.seed(seed) # set seed of permutation used to shuffle + indices = np.random.permutation(len(self.x)) + self.x = self.x[indices] # permutation + self.y = self.y[indices] + + # calculates gradient and modify theta + grad = self.dj(self.theta, self.stochastic) + if self.stochastic: + self.theta -= np.sum(np.float64(self.alpha) * grad, axis=0) + else: + self.theta -= self.alpha * grad + + # increase index used by stochastic gradient descent + self.stochastic_i += self.stochastic_step_size + + # save history for step back + self.history = self.set_list( + self.history, self.step_no, + (np.copy(self.theta), self.stochastic_i, seed)) + + def step_back(self): + if self.step_no > 0: + self.step_no -= 1 + + # modify theta + self.theta = np.copy(self.history[self.step_no][0]) + + # modify index for stochastic gradient descent + self.stochastic_i = self.history[self.step_no][1] + + # if necessary restore data shuffle + seed = self.history[self.step_no + 1][2] + if seed is not None: # it means data had been permuted on this pos + np.random.seed(seed) # use same seed to revert + indices = np.random.permutation(len(self.x)) + indices_reverse = np.argsort(indices) + # indices of sorted indices gives us reversing shuffle list + self.x = self.x[indices_reverse] + self.y = self.y[indices_reverse] + + def j(self, theta): + """ + Cost function for logistic regression + """ + raise NotImplementedError("Subclasses should implement this!") + + def dj(self, theta, stochastic=False): + """ + Gradient of the cost function for logistic regression + """ + raise NotImplementedError("Subclasses should implement this!") + + def optimized(self): + """ + Function performs whole model training. Not step by step. + """ + + res = fmin_l_bfgs_b(self.j, + np.zeros(self.x.shape[1]), + self.dj) + return res[0] + + @staticmethod + def set_list(l, i, v): + """ + Function sets i-th value in list to v. If i does not exist in l + it is initialized else value is modified + """ + try: + l[i] = v + except IndexError: + for _ in range(i-len(l)): + l.append(None) + l.append(v) + return l diff --git a/orangecontrib/educational/widgets/utils/linear_regression.py b/orangecontrib/educational/widgets/utils/linear_regression.py new file mode 100644 index 00000000..3ee1f32d --- /dev/null +++ b/orangecontrib/educational/widgets/utils/linear_regression.py @@ -0,0 +1,67 @@ +import numpy as np + +from Orange.classification import Model + +from orangecontrib.educational.widgets.utils.gradient_descent import \ + GradientDescent + + +class LinearRegression(GradientDescent): + """ + Logistic regression algorithm with custom cost and gradient function, + which allow to perform algorithm step by step + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @property + def model(self): + """ + Function returns model based on current parameters. + """ + if self.theta is None or self.domain is None: + return None + else: + return LinearRegressionModel( + self.theta, self.domain, intercept=self.intercept) + + def j(self, theta): + """ + Cost function for logistic regression + """ + h = self.h(self.x, theta) + return 1.0 / 2.0 * np.sum(np.square(h - self.y).T, axis=0) / len(self.y) + + def dj(self, theta, stochastic=False): + """ + Gradient of the cost function for logistic regression + """ + if stochastic: + ns = self.stochastic_step_size + x = self.x[self.stochastic_i: self.stochastic_i + ns] + y = self.y[self.stochastic_i: self.stochastic_i + ns] + h = self.h(x, theta) + return x * (h - y)[:, None] / len(y) + else: + x = self.x + y = self.y + h = self.h(x, theta) + return x.T.dot(h - y) / len(y) + + @staticmethod + def h(x, theta): + return x.dot(theta.T).T + + +class LinearRegressionModel(Model): + + def __init__(self, theta, domain, intercept=False): + super().__init__(domain) + self.theta = theta + self.name = "Linear Regression" + self.intercept = intercept + + def predict_storage(self, data): + x = np.c_[np.ones(len(data.X)), data.X] if self.intercept else data.X + return LinearRegression.h(x, self.theta) diff --git a/orangecontrib/educational/widgets/utils/logistic_regression.py b/orangecontrib/educational/widgets/utils/logistic_regression.py new file mode 100644 index 00000000..d5ac7381 --- /dev/null +++ b/orangecontrib/educational/widgets/utils/logistic_regression.py @@ -0,0 +1,80 @@ +import numpy as np + +from Orange.classification import Model + +from orangecontrib.educational.widgets.utils.gradient_descent import \ + GradientDescent + + +class LogisticRegression(GradientDescent): + """ + Logistic regression algorithm with custom cost and gradient function, + which allow to perform algorithm step by step + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @property + def model(self): + """ + Function returns model based on current parameters. + """ + if self.theta is None or self.domain is None: + return None + else: + return LogisticRegressionModel(self.theta, self.domain) + + def j(self, theta): + """ + Cost function for logistic regression + """ + h = self.g(self.x.dot(theta.T)).T + y = self.y + return ((-np.sum((y * np.log(h) + (1 - y) * np.log(1 - h)).T, axis=0) + + self.regularization_rate * np.sum(np.square(theta.T), axis=0)) / + len(y)) + + def dj(self, theta, stochastic=False): + """ + Gradient of the cost function for logistic regression + """ + if stochastic: + ns = self.stochastic_step_size + x = self.x[self.stochastic_i: self.stochastic_i + ns] + y = self.y[self.stochastic_i: self.stochastic_i + ns] + return x * (self.g(x.dot(theta)) - y)[:, None] / len(y) + else: + return ((self.g(self.x.dot(theta)) - self.y).dot(self.x) + + self.regularization_rate * theta) / len(self.y) + + @staticmethod + def g(z): + """ + Sigmoid function + + Parameters + ---------- + z : array_like(float) + values to evaluate with function + """ + + # limit values in z to avoid log with 0 produced by values almost 0 + z_mod = np.minimum(z, 20) + z_mod = np.maximum(z_mod, -20) + + return 1.0 / (1 + np.exp(- z_mod)) + +class LogisticRegressionModel(Model): + + def __init__(self, theta, domain): + super().__init__(domain) + self.theta = theta + self.name = "Logistic Regression" + + def predict_storage(self, data): + probabilities = LogisticRegression.g(data.X.dot(self.theta)) + values = np.around(probabilities) + probabilities0 = 1 - probabilities + probabilities = np.column_stack((probabilities0, probabilities)) + return values, probabilities diff --git a/orangecontrib/educational/widgets/utils/tests/test_gradient_descent.py b/orangecontrib/educational/widgets/utils/tests/test_gradient_descent.py new file mode 100644 index 00000000..bf330e07 --- /dev/null +++ b/orangecontrib/educational/widgets/utils/tests/test_gradient_descent.py @@ -0,0 +1,131 @@ +import unittest +from Orange.data import Table +from orangecontrib.educational.widgets.utils.gradient_descent import \ + GradientDescent +from numpy.testing import * +import numpy as np + + +class TestGradientDescent(unittest.TestCase): + + def setUp(self): + self.iris = Table('iris') + # new_domain = Domain(self.data.domain.attributes[:2]) + # self.data = Table(new_domain, self.data) + self.gradient_descent = GradientDescent() + + def test_set_data(self): + """ + Test set data + """ + + # check if None on beginning + self.assertIsNone(self.gradient_descent.x, None) + self.assertIsNone(self.gradient_descent.y, None) + self.assertIsNone(self.gradient_descent.domain, None) + + # check if correct data are provided + self.gradient_descent.set_data(self.iris) + + assert_array_equal(self.gradient_descent.x, self.iris.X) + assert_array_equal(self.gradient_descent.y, self.iris.Y) + self.assertEqual(self.gradient_descent.domain, self.iris.domain) + + # check data remove + self.gradient_descent.set_data(None) + + self.assertIsNone(self.gradient_descent.x, None) + self.assertIsNone(self.gradient_descent.y, None) + self.assertIsNone(self.gradient_descent.domain, None) + + def test_set_theta(self): + """ + Check set theta + """ + gd = self.gradient_descent + + # theta must be none on beginning + self.assertIsNone(gd.theta, None) + + # check if theta set correctly + # theta from np array + gd.set_theta(np.array([1, 2])) + assert_array_equal(gd.theta, np.array([1, 2])) + # history of 0 have to be equal theta + assert_array_equal(gd.history[0][0], np.array([1, 2])) + # step no have to reset to 0 + self.assertEqual(gd.step_no, 0) + + # theta from list + gd.set_theta([2, 3]) + assert_array_equal(gd.theta, np.array([2, 3])) + assert_array_equal(gd.history[0][0], np.array([2, 3])) + self.assertEqual(gd.step_no, 0) + + # theta None + gd.set_theta(None) + self.assertIsNone(gd.theta) + + # theta anything else + gd.set_theta("abc") + self.assertIsNone(gd.theta) + + def test_set_alpha(self): + """ + Check if alpha set correctly + """ + gd = self.gradient_descent + + # check alpha 0.1 in the beginning + self.assertEqual(gd.alpha, 0.1) + + # check if alpha set correctly + gd.set_alpha(0.2) + self.assertEqual(gd.alpha, 0.2) + + # check if alpha removed correctly + gd.set_alpha(None) + self.assertIsNone(gd.alpha) + + def test_model(self): + """ + Test if model is correct + """ + self.assertRaises( + NotImplementedError, lambda: self.gradient_descent.model) + + def test_j(self): + """ + Test cost function j + """ + self.assertRaises(NotImplementedError, self.gradient_descent.j, [1, 1]) + + def test_dj(self): + """ + Test gradient function + """ + self.assertRaises(NotImplementedError, self.gradient_descent.dj, [1, 1]) + + def test_optimized(self): + """ + Test if optimized works well + """ + self.gradient_descent.set_data(self.iris) + self.assertRaises(NotImplementedError, self.gradient_descent.optimized) + + def test_set_list(self): + """ + Test set list + """ + gd = self.gradient_descent + + # test adding Nones if list too short + self.assertEqual(gd.set_list([], 2, 1), [None, None, 1]) + # test adding Nones if list too short + self.assertEqual(gd.set_list([2], 2, 1), [2, None, 1]) + # adding to end + self.assertEqual(gd.set_list([2, 1], 2, 1), [2, 1, 1]) + # changing the element in the last place + self.assertEqual(gd.set_list([2, 1], 1, 3), [2, 3]) + # changing the element in the middle place + self.assertEqual(gd.set_list([2, 1, 3], 1, 3), [2, 3, 3]) diff --git a/orangecontrib/educational/widgets/utils/tests/test_linear_regression.py b/orangecontrib/educational/widgets/utils/tests/test_linear_regression.py new file mode 100644 index 00000000..8e9f6932 --- /dev/null +++ b/orangecontrib/educational/widgets/utils/tests/test_linear_regression.py @@ -0,0 +1,382 @@ +import unittest +from Orange.data import Table, Domain +from Orange.preprocess import Normalize + +from orangecontrib.educational.widgets.utils.linear_regression import \ + LinearRegression +from numpy.testing import * +import numpy as np + + +class TestLogisticRegression(unittest.TestCase): + + def setUp(self): + self.housing = Normalize(Table('housing')) + self.linear_regression = LinearRegression() + + def test_model(self): + """ + Test if model is correct + """ + lr = self.linear_regression + + # test if model None when no data + lr.set_theta([1, 2]) + self.assertIsNone(lr.model) + + # test if model None when no theta + lr.set_theta(None) + lr.set_data(self.housing) + self.assertIsNone(lr.model) + + # test if model None when no theta and no Data + lr.set_data(None) + self.assertIsNone(lr.model) + + theta = np.ones(len(self.housing.domain.attributes)) + # test when model is not none + lr.set_data(self.housing) + lr.set_theta(theta) + model = lr.model + + # test parameters are ok + self.assertIsNotNone(model) + assert_array_equal(model.theta, theta) + self.assertEqual(model.name, "Linear Regression") + + # test class returns correct predictions + values = model(self.housing) + self.assertEqual(len(values), len(self.housing)) + + # test with intercept + theta = np.ones(len(self.housing.domain.attributes) + 1) + lr.set_theta(theta) + lr.intercept = True + model = lr.model + + assert_array_equal(model.theta, theta) + + # test class returns correct predictions + values = model(self.housing) + self.assertEqual(len(values), len(self.housing)) + + def test_converged(self): + """ + Test convergence flag or the algorithm + """ + lr = self.linear_regression + lr.set_data(self.housing) + theta = np.ones(len(self.housing.domain.attributes)) + lr.set_theta(theta) + lr.set_alpha(0.1) + # we found out for example in test convergence is faster with this alpha + + # it can not converge in the first step + self.assertFalse(lr.converged) + + # it converge when distance between current theta and this is < 1e-2 + converge = False + while not converge: + lr.step() + converge = (np.sum( + np.abs(lr.theta - lr.history[lr.step_no - 1][0])) / + len(lr.theta) < 1e-2) + self.assertEqual(lr.converged, converge) + + def test_step(self): + """ + Test step method + """ + lr = self.linear_regression + + theta = np.ones(len(self.housing.domain.attributes)) + lr.set_theta(theta) + lr.set_data(self.housing) + + # check beginning + self.assertEqual(lr.step_no, 0) + + # perform step + lr.step() + + # check if parameters are fine + self.assertEqual(len(lr.theta), len(theta)) + assert_array_equal(lr.history[1][0], lr.theta) + self.assertEqual(lr.step_no, 1) + + # perform step + lr.step() + + # check if parameters are fine + self.assertEqual(len(lr.theta), len(theta)) + assert_array_equal(lr.history[2][0], lr.theta) + self.assertEqual(lr.step_no, 2) + + # check for stochastic + lr.stochastic = True + + # perform step + lr.step() + self.assertEqual(len(lr.theta), len(theta)) + assert_array_equal(lr.history[3][0], lr.theta) + + # check if stochastic_i indices are ok + self.assertEqual( + lr.history[3][1], lr.stochastic_i) + + # reset algorithm + lr.set_data(self.housing) + + # wait for shuffle and check if fine + shuffle = False + while not shuffle: + lr.step() + shuffle = lr.history[lr.step_no][2] is not None + if shuffle: + self.assertEqual(len(lr.x), len(self.housing)) + self.assertEqual(len(lr.y), len(self.housing)) + + # check when intercept + theta = np.ones(len(self.housing.domain.attributes) + 1) + lr.set_theta(theta) + lr.intercept = True + lr.set_data(self.housing) + + # check beginning + self.assertEqual(lr.step_no, 0) + + # perform step + lr.step() + + # check if parameters are fine + self.assertEqual(len(lr.theta), len(theta)) + assert_array_equal(lr.history[1][0], lr.theta) + self.assertEqual(lr.step_no, 1) + + # perform step + lr.step() + + # check if parameters are fine + self.assertEqual(len(lr.theta), len(theta)) + assert_array_equal(lr.history[2][0], lr.theta) + self.assertEqual(lr.step_no, 2) + + # check for stochastic + lr.stochastic = True + + # perform step + lr.step() + self.assertEqual(len(lr.theta), len(theta)) + assert_array_equal(lr.history[3][0], lr.theta) + + # check if stochastic_i indices are ok + self.assertEqual( + lr.history[3][1], lr.stochastic_i) + + # reset algorithm + lr.set_data(self.housing) + + # wait for shuffle and check if fine + shuffle = False + while not shuffle: + lr.step() + shuffle = lr.history[lr.step_no][2] is not None + if shuffle: + self.assertEqual(len(lr.x), len(self.housing)) + self.assertEqual(len(lr.y), len(self.housing)) + + def test_step_back(self): + """ + Test step back function + """ + lr = self.linear_regression + theta = np.ones(len(self.housing.domain.attributes)) + + lr.set_data(self.housing) + lr.set_theta(np.copy(theta)) + + # check no step back when no step done before + lr.step_back() + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + # perform step and step back + lr.step() + lr.step_back() + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + lr.step() + theta1 = np.copy(lr.theta) + lr.step() + lr.step_back() + + assert_array_equal(lr.theta, theta1) + self.assertEqual(lr.step_no, 1) + + lr.step_back() + + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + # test for stochastic + lr.stochastic = True + + lr.step() + lr.step_back() + self.assertEqual(lr.stochastic_i, 0) + self.assertEqual(lr.step_no, 0) + + lr.step() + lr.step() + lr.step_back() + + self.assertEqual(lr.stochastic_i, lr.stochastic_step_size) + self.assertEqual(lr.step_no, 1) + + lr.step_back() + + self.assertEqual(lr.stochastic_i, 0) + self.assertEqual(lr.step_no, 0) + + # wait for shuffle and check if fine + shuffle = False + before = np.copy(lr.x) + while not shuffle: + lr.step() + shuffle = lr.history[lr.step_no][2] is not None + + lr.step_back() + assert_array_equal(lr.x, before) + + # with intercept + lr = self.linear_regression + theta = np.ones(len(self.housing.domain.attributes) + 1) + + lr.intercept = True + lr.set_data(self.housing) + lr.set_theta(np.copy(theta)) + + # check no step back when no step done before + lr.step_back() + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + # perform step and step back + lr.step() + lr.step_back() + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + lr.step() + theta1 = np.copy(lr.theta) + lr.step() + lr.step_back() + + assert_array_equal(lr.theta, theta1) + self.assertEqual(lr.step_no, 1) + + lr.step_back() + + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + # test for stochastic + lr.stochastic = True + + lr.step() + lr.step_back() + self.assertEqual(lr.stochastic_i, 0) + self.assertEqual(lr.step_no, 0) + + lr.step() + lr.step() + lr.step_back() + + self.assertEqual(lr.stochastic_i, lr.stochastic_step_size) + self.assertEqual(lr.step_no, 1) + + lr.step_back() + + self.assertEqual(lr.stochastic_i, 0) + self.assertEqual(lr.step_no, 0) + + # wait for shuffle and check if fine + shuffle = False + before = np.copy(lr.x) + while not shuffle: + lr.step() + shuffle = lr.history[lr.step_no][2] is not None + + lr.step_back() + assert_array_equal(lr.x, before) + + def test_j(self): + """ + Test cost function j + """ + lr = self.linear_regression + + lr.set_data(self.housing) + + theta = np.ones(len(self.housing.domain.attributes)) + # test with one theta and with list of thetas + self.assertEqual(type(lr.j(theta)), np.float64) + self.assertEqual( + len(lr.j(np.vstack((theta, theta * 2)))), 2) + + def test_dj(self): + """ + Test gradient function + """ + lr = self.linear_regression + + theta = np.ones(len(self.housing.domain.attributes)) + lr.set_data(self.housing) + # check length with stochastic and usual + self.assertEqual(len(lr.dj(theta)), len(theta)) + self.assertTupleEqual( + lr.dj(theta, stochastic=True).shape, (30, len(theta))) + + lr.stochastic_step_size = 2 + self.assertTupleEqual( + lr.dj(theta, stochastic=True).shape, (2, len(theta))) + + lr.stochastic_step_size = 1 + self.assertTupleEqual( + lr.dj(theta, stochastic=True).shape, (1, len(theta))) + + def test_optimized(self): + """ + Test if optimized works well + """ + lr = self.linear_regression + + lr.set_data(self.housing) + op_theta = lr.optimized() + self.assertEqual(len(op_theta), len(self.housing.domain.attributes)) + + # check if really minimal, function is monotonic so everywhere around + # j should be higher + attr_x = self.housing.domain['CRIM'] + attr_y = self.housing.domain['ZN'] + cols = [] + for attr in (attr_x, attr_y) if attr_y is not None else (attr_x, ): + subset = self.housing[:, attr] + cols.append(subset.X) + x = np.column_stack(cols) + + domain = Domain([attr_x, attr_y], self.housing.domain.class_var) + data = Normalize(Table(domain, x, self.housing.Y), transform_class=True) + + lr.set_data(data) + op_theta = lr.optimized() + + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([1, 0]))) + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([0, 1]))) + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([-1, 0]))) + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([0, -1]))) diff --git a/orangecontrib/educational/widgets/utils/tests/test_logistic_regression.py b/orangecontrib/educational/widgets/utils/tests/test_logistic_regression.py new file mode 100644 index 00000000..69753ed3 --- /dev/null +++ b/orangecontrib/educational/widgets/utils/tests/test_logistic_regression.py @@ -0,0 +1,289 @@ +import unittest +from Orange.data import Table +from orangecontrib.educational.widgets.utils.logistic_regression import \ + LogisticRegression +from numpy.testing import * +import numpy as np + + +class TestLogisticRegression(unittest.TestCase): + + def setUp(self): + self.iris = Table('iris') + self.logistic_regression = LogisticRegression() + + def test_model(self): + """ + Test if model is correct + """ + lr = self.logistic_regression + + # test if model None when no data + lr.set_theta([1, 2]) + self.assertIsNone(lr.model) + + # test if model None when no theta + lr.set_theta(None) + lr.set_data(self.iris) + self.assertIsNone(lr.model) + + # test if model None when no theta and no Data + lr.set_data(None) + self.assertIsNone(lr.model) + + # test when model is not none + lr.set_data(self.iris) + lr.set_theta([1, 1, 1, 1]) + model = lr.model + + # test parameters are ok + self.assertIsNotNone(model) + assert_array_equal(model.theta, np.array([1, 1, 1, 1])) + self.assertEqual(model.name, "Logistic Regression") + + # test class returns correct predictions + values, probabilities = model(self.iris, ret=2) + self.assertEqual(len(values), len(self.iris)) + self.assertEqual(len(probabilities), len(self.iris)) + # values have to be 0 if prob <0.5 else 1 + assert_array_equal(values, np.around(probabilities)[:, 1]) + + def test_converged(self): + """ + Test convergence flag or the algorithm + """ + lr = self.logistic_regression + lr.set_data(self.iris) + lr.set_theta([1., 1., 1., 1.]) + lr.set_alpha(100) + # we found out for example in test convergence is faster with this alpha + + # it can not converge in the first step + self.assertFalse(lr.converged) + + # it converge when distance between current theta and this is < 1e-2 + converge = False + while not converge: + lr.step() + converge = (np.sum( + np.abs(lr.theta - lr.history[lr.step_no - 1][0])) / + len(lr.theta) < 1e-2) + self.assertEqual(lr.converged, converge) + + def test_step(self): + """ + Test step method + """ + lr = self.logistic_regression + + lr.set_theta([1., 1., 1., 1.]) + lr.set_data(self.iris) + + # check beginning + self.assertEqual(lr.step_no, 0) + + # perform step + lr.step() + + # check if parameters are fine + self.assertEqual(len(lr.theta), 4) + assert_array_equal(lr.history[1][0], lr.theta) + + # perform step + lr.step() + + # check if parameters are fine + self.assertEqual(len(lr.theta), 4) + assert_array_equal(lr.history[2][0], lr.theta) + + # check for stochastic + lr.stochastic = True + + # perform step + lr.step() + self.assertEqual(len(lr.theta), 4) + assert_array_equal(lr.history[3][0], lr.theta) + + # check if stochastic_i indices are ok + self.assertEqual( + lr.history[3][1], lr.stochastic_i) + + # reset algorithm + lr.set_data(self.iris) + + # wait for shuffle and check if fine + shuffle = False + while not shuffle: + lr.step() + shuffle = lr.history[lr.step_no][2] is not None + if shuffle: + self.assertEqual(len(lr.x), len(self.iris)) + self.assertEqual(len(lr.y), len(self.iris)) + + def test_step_back(self): + """ + Test step back function + """ + lr = self.logistic_regression + theta = [1., 1., 1., 1.] + + lr.set_data(self.iris) + lr.set_theta(theta) + + # check no step back when no step done before + lr.step_back() + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + # perform step and step back + lr.step() + lr.step_back() + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + lr.step() + theta1 = np.copy(lr.theta) + lr.step() + lr.step_back() + + assert_array_equal(lr.theta, theta1) + self.assertEqual(lr.step_no, 1) + + lr.step_back() + + assert_array_equal(lr.theta, theta) + self.assertEqual(lr.step_no, 0) + + # test for stochastic + lr.stochastic = True + + lr.step() + lr.step_back() + self.assertEqual(lr.stochastic_i, 0) + self.assertEqual(lr.step_no, 0) + + lr.step() + lr.step() + lr.step_back() + + self.assertEqual(lr.stochastic_i, lr.stochastic_step_size) + self.assertEqual(lr.step_no, 1) + + lr.step_back() + + self.assertEqual(lr.stochastic_i, 0) + self.assertEqual(lr.step_no, 0) + + # wait for shuffle and check if fine + shuffle = False + before = np.copy(lr.x) + while not shuffle: + lr.step() + shuffle = lr.history[lr.step_no][2] is not None + + lr.step_back() + assert_array_equal(lr.x, before) + + def test_j(self): + """ + Test cost function j + """ + lr = self.logistic_regression + + lr.set_data(self.iris) + + # test with one theta and with list of thetas + self.assertEqual(type(lr.j(np.array([1., 1., 1., 1.]))), np.float64) + self.assertEqual( + len(lr.j(np.array([[1., 1., 1., 1.], [2, 2, 2, 2]]))), 2) + + def test_dj(self): + """ + Test gradient function + """ + lr = self.logistic_regression + + lr.set_data(self.iris) + # check length with stochastic and usual + self.assertEqual(len(lr.dj(np.array([1, 1, 1, 1]))), 4) + self.assertTupleEqual( + lr.dj(np.array([1, 1, 1, 1]), stochastic=True).shape, (30, 4)) + + lr.stochastic_step_size = 2 + self.assertTupleEqual( + lr.dj(np.array([1, 1, 1, 1]), stochastic=True).shape, (2, 4)) + + lr.stochastic_step_size = 1 + self.assertTupleEqual( + lr.dj(np.array([1, 1, 1, 1]), stochastic=True).shape, (1, 4)) + + def test_optimized(self): + """ + Test if optimized works well + """ + lr = self.logistic_regression + + lr.set_data(self.iris) + op_theta = lr.optimized() + self.assertEqual(len(op_theta), 4) + + # check if really minimal, function is monotonic so everywhere around + # j should be higher + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([1, 0, 0, 0]))) + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([0, 1, 0, 0]))) + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([0, 0, 1, 0]))) + self.assertLessEqual( + lr.j(op_theta), lr.j(op_theta + np.array([0, 0, 0, 1]))) + + def test_g(self): + """ + Test sigmoid function + """ + lr = self.logistic_regression + + # test length + self.assertEqual(type(lr.g(1)), np.float64) + self.assertEqual(len(lr.g(np.array([1, 1]))), 2) + self.assertEqual(len(lr.g(np.array([1, 1, 1]))), 3) + self.assertEqual(len(lr.g(np.array([1, 1, 1, 1]))), 4) + + # test correctness, function between 0 and 1 + self.assertGreaterEqual(lr.g(-10000), 0) + self.assertGreaterEqual(lr.g(-1000), 0) + self.assertGreaterEqual(lr.g(-10), 0) + self.assertGreaterEqual(lr.g(-1), 0) + self.assertGreaterEqual(lr.g(0), 0) + self.assertGreaterEqual(lr.g(1), 0) + self.assertGreaterEqual(lr.g(10), 0) + self.assertGreaterEqual(lr.g(1000), 0) + self.assertGreaterEqual(lr.g(10000), 0) + + self.assertLessEqual(lr.g(-10000), 1) + self.assertLessEqual(lr.g(-1000), 1) + self.assertLessEqual(lr.g(-10), 1) + self.assertLessEqual(lr.g(-1), 1) + self.assertLessEqual(lr.g(0), 1) + self.assertLessEqual(lr.g(1), 1) + self.assertLessEqual(lr.g(10), 1) + self.assertLessEqual(lr.g(1000), 1) + self.assertLessEqual(lr.g(10000), 1) + + def test_set_list(self): + """ + Test set list + """ + lr = self.logistic_regression + + # test adding Nones if list too short + self.assertEqual(lr.set_list([], 2, 1), [None, None, 1]) + # test adding Nones if list too short + self.assertEqual(lr.set_list([2], 2, 1), [2, None, 1]) + # adding to end + self.assertEqual(lr.set_list([2, 1], 2, 1), [2, 1, 1]) + # changing the element in the last place + self.assertEqual(lr.set_list([2, 1], 1, 3), [2, 3]) + # changing the element in the middle place + self.assertEqual(lr.set_list([2, 1, 3], 1, 3), [2, 3, 3])