diff --git a/doc/widgets/polynomial-regression.md b/doc/widgets/polynomial-regression.md index 62051e3..0a54051 100644 --- a/doc/widgets/polynomial-regression.md +++ b/doc/widgets/polynomial-regression.md @@ -47,4 +47,4 @@ To observe different results, change **Linear Regression** to any other regressi ![](images/polyregressiontree1.png) -![](images/polynomial-regression-tree-exp1.png) +![](images/polynomial-regression-tree-exp1.png) \ No newline at end of file diff --git a/orangecontrib/educational/__init__.py b/orangecontrib/educational/__init__.py index e69de29..643c882 100644 --- a/orangecontrib/educational/__init__.py +++ b/orangecontrib/educational/__init__.py @@ -0,0 +1,17 @@ +from Orange.data import Table + + +# Remove this when we require Orange 3.34 +if not hasattr(Table, "get_column"): + import scipy.sparse as sp + import numpy as np + + def get_column(self, column): + col, _ = self.get_column_view(column) + if sp.issparse(col): + col = col.toarray().reshape(-1) + if self.domain[column].is_primitive(): + col = col.astype(np.float64) + return col + + Table.get_column = get_column \ No newline at end of file diff --git a/orangecontrib/educational/widgets/owpolynomialregression.py b/orangecontrib/educational/widgets/owpolynomialregression.py index 3b47909..34992e2 100644 --- a/orangecontrib/educational/widgets/owpolynomialregression.py +++ b/orangecontrib/educational/widgets/owpolynomialregression.py @@ -1,7 +1,7 @@ import math from Orange.data.util import SharedComputeValue -from Orange.evaluation import RMSE, TestOnTrainingData, MAE +from Orange.evaluation import RMSE, MAE, Results from AnyQt.QtCore import Qt, QRectF, QPointF from AnyQt.QtGui import QColor, QPalette, QPen, QFont @@ -16,7 +16,6 @@ from Orange.data.variable import ContinuousVariable, StringVariable from Orange.regression.linear import RidgeRegressionLearner, LinearRegressionLearner from Orange.base import Learner -from Orange.preprocess import PreprocessorList from Orange.regression.mean import MeanModel from Orange.statistics.distribution import Continuous from Orange.widgets import settings, gui @@ -35,6 +34,14 @@ def __init__(self, compute_shared, idx): def compute(self, _, shared_data): return shared_data[:, self.idx] + def __eq__(self, other): + # Remove the first test after we require Orange 3.33 + return type(self) is type(other) \ + and super().__eq__(other) and self.idx == other.idx + + def __hash__(self): + return hash((super().__hash__(), self.idx)) + class PolynomialFeatures: def __init__(self, degree=2, include_bias=True): @@ -42,20 +49,27 @@ def __init__(self, degree=2, include_bias=True): self.include_bias = include_bias def __call__(self, data): - features = [] - # PolynomialFeatures raises ValueError when degree=0 and include_bias=False - if self.degree > 0 or self.include_bias: + if self.degree == 0: + # Zero degree without intercept shouldn't have a column of 1's, + # otherwise we do have intercapt. But some data is needed by most + # learners, so we provide a column of zeros + variables = [ + ContinuousVariable( + "x0", + compute_value=TempMeanModel(int(self.include_bias)))] + else: pf = skl_preprocessing.PolynomialFeatures( self.degree, include_bias=self.include_bias ) pf.fit(data.X) - cv = lambda table: pf.transform(table.X) + cv = lambda table: pf.transform(table.transform(data.domain).X) features = pf.get_feature_names_out() if pf.n_output_features_ else [] - domain = Domain( - [ + variables = [ ContinuousVariable(f, compute_value=PolynomialFeatureSharedCV(cv, i)) for i, f in enumerate(features) - ], + ] + domain = Domain( + variables, class_vars=data.domain.class_vars, metas=data.domain.metas, ) @@ -68,6 +82,11 @@ class TempMeanModel(MeanModel): original domain space which produces empty X - and so the error is raised Here we bypass model's __call__ """ + InheritEq = True + + def __init__(self, const): + distr = Continuous(np.array([[const], [1.0]])) + super().__init__(distr) def __call__(self, data, *args, **kwargs): return self.predict(data) @@ -76,10 +95,58 @@ def __call__(self, data, *args, **kwargs): class RegressTo0(Learner): @staticmethod def __call__(data, *args, **kwargs): - model = TempMeanModel(Continuous(np.empty(0))) + model = TempMeanModel(0) return model +class PolynomialLearnerWrapper(Learner): + def __init__(self, x_var, y_var, degree, learner, preprocessors, fit_intercept): + super().__init__() + self.x_var = x_var + self.y_var = y_var + self.degree = degree + self.learner = learner + self.preprocessors = preprocessors + self.fit_intercept = fit_intercept + + def __call__(self, data: Table, progress_callback=None): + data = data.transform(Domain([self.x_var], self.y_var)) + *_, model = self.data_and_model(data) + return model + + def data_and_model(self, data: Table): + """ + Trains the model, and also returns temporary tables + + The function is used in the widget instead of __call__ to avoid + recomputing preprocessed and expanded data. + """ + valid_mask = np.isfinite(data.get_column(self.x_var)) \ + & np.isfinite(data.get_column(self.y_var)) + data_table = Table.from_table( + Domain([self.x_var], self.y_var), data[valid_mask] + ) + + # all lines have nan + if np.all(np.isnan(data_table.X.flatten()) | np.isnan(data_table.Y)): + return None, None, None, None, None + + # apply preprocessors on the input first + preprocessed_table = ( + self.preprocessors(data_table) if self.preprocessors else data_table + ) + + # use polynomial preprocessor after applying preprocessors from input + poly_preprocessor = PolynomialFeatures( + degree=self.degree, include_bias=self.fit_intercept + ) + + expanded_data = poly_preprocessor(preprocessed_table) + predictor = self.learner(expanded_data) + return (data_table, preprocessed_table, poly_preprocessor, + expanded_data, predictor) + + class OWPolynomialRegression(OWBaseLearner): name = "Polynomial Regression" description = "Univariate regression with polynomial expansion." @@ -92,8 +159,9 @@ class Inputs(OWBaseLearner.Inputs): learner = Input("Learner", Learner) class Outputs(OWBaseLearner.Outputs): - coefficients = Output("Coefficients", Table, default=True) - data = Output("Data", Table) + coefficients = Output("Coefficients", Table, explicit=True) + data = Output("Data", Table, explicit=True) + replaces = [ "Orange.widgets.regression.owunivariateregression." @@ -339,138 +407,121 @@ def _has_intercept(self): return self.learner is not None or self.fit_intercept def apply(self): + def error_and_clear(error=None): + if error: + error() + self.clear_plot() + self.Outputs.data.send(None) + self.Outputs.coefficients.send(None) + self.Outputs.learner.send(None) + self.Outputs.model.send(None) + + self.Error.all_none.clear() + self.Error.same_dep_indepvar.clear() + if self.data is None: + error_and_clear() + return + if self.x_var is self.y_var: + error_and_clear(self.Error.same_dep_indepvar) + return + degree = self.polynomialexpansion - if degree == 0 and not self.fit_intercept: + if degree == 0 and not self.fit_intercept and ( + self.learner is None + or not getattr(self.learner, "fit_intercept", True)): learner = RegressTo0() else: # For LinearRegressionLearner, set fit_intercept to False: # the intercept is added as bias term in polynomial expansion - # If there is a learner on input, we have not control over this; - # we include_bias to have the placeholder for the coefficient - learner = self.learner or LinearRegressionLearner(fit_intercept=False) + learner = self.learner \ + or LinearRegressionLearner(fit_intercept=False) + + include_bias = self.learner is None and self.fit_intercept + poly_learner = PolynomialLearnerWrapper( + self.x_var, self.y_var, degree, learner, + self.preprocessors, include_bias) + poly_learner.name = self.learner_name + + data_table, preprocessed_table, poly_preprocessor, \ + expanded_data, predictor = \ + poly_learner.data_and_model(self.data) + if preprocessed_table is None: + error_and_clear(self.Error.all_none) + return - learner.name = self.learner_name - predictor = None model = None + if hasattr(predictor, "model"): + model = predictor.model + elif hasattr(predictor, "skl_model"): + model = predictor.skl_model - self.Error.all_none.clear() - self.Error.same_dep_indepvar.clear() - - if self.data is not None: - if self.x_var is self.y_var: - self.Error.same_dep_indepvar() - self.clear_plot() - return - - valid_mask = ~np.isnan(self.data.X).any(axis=1) - data_table = Table.from_table( - Domain([self.x_var], self.y_var), self.data[valid_mask] - ) - - # all lines has nan - if np.all(np.isnan(data_table.X.flatten()) | np.isnan(data_table.Y)): - self.Error.all_none() - self.clear_plot() - return - - # apply preprocessors on the input first - preprocessed_table = ( - self.preprocessors(data_table) if self.preprocessors else data_table - ) - - # use polynomial preprocessor after applying preprocessors from input - poly_preprocessor = PolynomialFeatures( - degree=degree, include_bias=self.fit_intercept - ) - predictor = learner(poly_preprocessor(preprocessed_table)) - - if hasattr(predictor, "model"): - model = predictor.model - elif hasattr(predictor, "skl_model"): - model = predictor.skl_model - - x = preprocessed_table.X.ravel() - y = preprocessed_table.Y.ravel() + x = preprocessed_table.X.ravel() + y = preprocessed_table.Y.ravel() - linspace = Table.from_numpy( - Domain(data_table.domain.attributes), - np.linspace(np.nanmin(x), np.nanmax(x), 1000).reshape(-1, 1), - ) - values = predictor(linspace, predictor.Value) - - # calculate prediction for x from data for error bars and scores - validation = TestOnTrainingData() - pp = self.preprocessors - preprocessors = ([pp] if pp else []) + [poly_preprocessor] - predicted = validation( - data_table, [learner], preprocessor=PreprocessorList(preprocessors) - ) - self.rmse = round(RMSE(predicted)[0], 6) - self.mae = round(MAE(predicted)[0], 6) - - # plot error bars - self.plot_error_bars(x, predicted.actual, predicted.predicted.ravel()) - - # plot data points - self.plot_scatter_points(x, y) - - # plot regression line - x_data, y_data = linspace.X.ravel(), values.ravel() - if self.polynomialexpansion == 0: - self.plot_infinite_line(x_data[0], y_data[0], 0) - elif self.polynomialexpansion == 1 and hasattr(model, "coef_"): - k = model.coef_[1 if self._has_intercept else 0] - self.plot_infinite_line(x_data[0], y_data[0], - math.degrees(math.atan(k))) - else: - self.plot_regression_line(x_data, y_data) + linspace = Table.from_numpy( + Domain(data_table.domain.attributes), + np.linspace(np.nanmin(x), np.nanmax(x), 1000).reshape(-1, 1), + ) + values = predictor(linspace, predictor.Value) + + predicted = predictor(data_table, predictor.Value) + results = Results( + domain=self.data.domain, + nrows=len(data_table), learners=[poly_learner], + row_indices=np.arange(len(data_table)), + folds=(Ellipsis,), + actual=data_table.Y, + predicted=predicted[None, :]) + self.rmse = round(RMSE(results)[0], 6) + self.mae = round(MAE(results)[0], 6) + + # plot error bars + self.plot_error_bars(x, results.actual, results.predicted.ravel()) + + # plot data points + self.plot_scatter_points(x, y) + + # plot regression line + x_data, y_data = linspace.X.ravel(), values.ravel() + if self.polynomialexpansion == 0: + self.plot_infinite_line(x_data[0], y_data[0], 0) + elif self.polynomialexpansion == 1 and self.learner is None: + k = model.coef_[1 if self._has_intercept else 0] + self.plot_infinite_line(x_data[0], y_data[0], + math.degrees(math.atan(k))) + else: + self.plot_regression_line(x_data, y_data) - self.plot.getAxis("bottom").setLabel(self.x_var.name) - self.plot.getAxis("left").setLabel(self.y_var.name) - self.set_range(x, y) + self.plot.getAxis("bottom").setLabel(self.x_var.name) + self.plot.getAxis("left").setLabel(self.y_var.name) + self.set_range(x, y) - self.Outputs.learner.send(learner) + self.Outputs.learner.send(poly_learner) self.Outputs.model.send(predictor) # Send model coefficents if model is not None and hasattr(model, "coef_"): + if getattr(learner, "fit_intercept", True): + coefs = [model.intercept_] + else: + coefs = [] + coefs += list(model.coef_) + elif self.learner is None \ + and isinstance(predictor, MeanModel) \ + and self.fit_intercept: + coefs = [predictor.mean] + else: + coefs = None + if coefs: domain = Domain([ContinuousVariable("coef")], metas=[StringVariable("name")]) names = self._varnames(self.x_var.name) - coefs = list(model.coef_) - if self._has_intercept: - coefs[0] += model.intercept_ coef_table = Table.from_list(domain, list(zip(coefs, names))) self.Outputs.coefficients.send(coef_table) else: self.Outputs.coefficients.send(None) - self.send_data() - - def send_data(self): - if self.data is not None: - data_table = Table.from_table( - Domain([self.x_var], self.y_var), self.data) - polyfeatures = skl_preprocessing.PolynomialFeatures( - self.polynomialexpansion, include_bias=self._has_intercept) - - valid_mask = ~np.isnan(data_table.X).any(axis=1) - if not self._has_intercept and not self.polynomialexpansion: - x = np.empty((len(data_table), 0)) - else: - x = data_table.X[valid_mask] - x = polyfeatures.fit_transform(x) - - out_array = np.hstack((x, data_table.Y[np.newaxis].T[valid_mask])) - - out_domain = Domain( - [ContinuousVariable(name) - for name in self._varnames(self.x_var.name)], - self.y_var) - self.Outputs.data.send(Table.from_numpy(out_domain, out_array)) - return - - self.Outputs.data.send(None) + self.Outputs.data.send(expanded_data) def send_report(self): if self.data is None: diff --git a/orangecontrib/educational/widgets/tests/test_owpolynomialregression.py b/orangecontrib/educational/widgets/tests/test_owpolynomialregression.py index 6ac734e..37d290d 100644 --- a/orangecontrib/educational/widgets/tests/test_owpolynomialregression.py +++ b/orangecontrib/educational/widgets/tests/test_owpolynomialregression.py @@ -1,9 +1,12 @@ +import unittest + import numpy as np from Orange.data import Table, Domain, ContinuousVariable from Orange.widgets.tests.base import WidgetTest from orangecontrib.educational.widgets.owpolynomialregression \ - import OWPolynomialRegression + import OWPolynomialRegression, PolynomialFeatures, RegressTo0, \ + TempMeanModel, PolynomialLearnerWrapper from Orange.regression import (LinearRegressionLearner, RandomForestRegressionLearner) from Orange.regression.tree import TreeLearner as TreeRegressionLearner @@ -207,28 +210,55 @@ def test_data_output(self): Check if correct data on output """ w = self.widget + spin =w.controls.polynomialexpansion self.assertIsNone(self.get_output(w.Outputs.data)) - self.widget.set_data(self.data) - spin = self.widget.controls.polynomialexpansion + + u, x, y, z = (ContinuousVariable(n) for n in "uxyz") + domain = Domain([u, x], y) + data = Table.from_numpy( + domain, + [[1, 1], [0, 2], [np.nan, 3], [-1, np.nan], [2, 4]], + [3, 5, 7, 7, np.nan]) + + spin.setValue(0) + self.send_signal(w.Inputs.data, data) + w.x_var = x + w.y_var = y + + w.fit_intercept = False spin.setValue(1) - self.widget.send_data() - self.assertEqual(len(self.get_output(w.Outputs.data).domain.attributes), 2) + np.testing.assert_almost_equal( + self.get_output(w.Outputs.data).X.T, + [[1, 2, 3]]) spin.setValue(2) - self.widget.send_data() - self.assertEqual(len(self.get_output(w.Outputs.data).domain.attributes), 3) + np.testing.assert_almost_equal( + self.get_output(w.Outputs.data).X.T, + [[1, 2, 3], [1, 4, 9]]) spin.setValue(3) - self.widget.send_data() - self.assertEqual(len(self.get_output(w.Outputs.data).domain.attributes), 4) + np.testing.assert_almost_equal( + self.get_output(w.Outputs.data).X.T, + [[1, 2, 3], [1, 4, 9], [1, 8, 27]]) - spin.setValue(4) - self.widget.send_data() - self.assertEqual(len(self.get_output(w.Outputs.data).domain.attributes), 5) + w.fit_intercept = True + spin.setValue(1) + np.testing.assert_almost_equal( + self.get_output(w.Outputs.data).X.T, + [[1, 1, 1], [1, 2, 3]]) - self.widget.set_data(None) - self.widget.send_data() + spin.setValue(2) + np.testing.assert_almost_equal( + self.get_output(w.Outputs.data).X.T, + [[1, 1, 1], [1, 2, 3], [1, 4, 9]]) + + spin.setValue(3) + np.testing.assert_almost_equal( + self.get_output(w.Outputs.data).X.T, + [[1, 1, 1], [1, 2, 3], [1, 4, 9], [1, 8, 27]]) + + self.send_signal(w.Inputs.data, None) self.assertIsNone(self.get_output(w.Outputs.data)) def test_data_nan_row(self): @@ -244,24 +274,32 @@ def test_data_nan_row(self): def test_coefficients(self): w = self.widget - self.send_signal(w.Inputs.data, self.data) + u, x, y, z = (ContinuousVariable(n) for n in "uxyz") + domain = Domain([u, x], y) + data = Table.from_numpy( + domain, + [[1, 1], [0, 2], [np.nan, 3], [-1, np.nan], [2, 4]], + [3, 5, 7, 7, np.nan]) + + self.send_signal(w.Inputs.data, data) + w.x_var = x + w.y_var = y + spin = self.widget.controls.polynomialexpansion intercept_cb = self.widget.controls.fit_intercept + intercept_cb.setChecked(True) spin.setValue(0) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(1, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + np.testing.assert_almost_equal(coef.X, [[5]]) spin.setValue(1) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(2, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + np.testing.assert_almost_equal(coef.X.T, [[1, 2]]) spin.setValue(2) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(3, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + np.testing.assert_almost_equal(coef.X.T, [[1, 2, 0]]) intercept_cb.setChecked(False) spin.setValue(0) @@ -270,50 +308,234 @@ def test_coefficients(self): spin.setValue(1) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(1, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + # I haven't computed this value manually, I just copied it + np.testing.assert_almost_equal(coef.X.T, [[2.4285714]]) spin.setValue(2) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(2, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + # I haven't computed these values manually, I just copied them + np.testing.assert_almost_equal(coef.X.T, [[ 3.1052632, -0.2631579]]) - self.send_signal(w.Inputs.learner, LinearRegressionLearner()) + self.send_signal(w.Inputs.learner, LinearRegressionLearner(fit_intercept=True)) spin.setValue(0) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(1, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + np.testing.assert_almost_equal(coef.X.T, [[5]]) spin.setValue(1) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(2, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + np.testing.assert_almost_equal(coef.X.T, [[1, 2]]) spin.setValue(2) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(3, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + np.testing.assert_almost_equal(coef.X.T, [[1, 2, 0]]) - # intercept is produced by PolynomialFeatures preprocessors self.send_signal(w.Inputs.learner, LinearRegressionLearner(fit_intercept=False)) spin.setValue(0) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(1, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + self.assertIsNone(coef) spin.setValue(1) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(2, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + # I haven't computed this value manually, I just copied it + np.testing.assert_almost_equal(coef.X.T, [[2.4285714]]) spin.setValue(2) coef = self.get_output(w.Outputs.coefficients) - self.assertEqual(3, len(coef)) - self.assertTrue(coef.X.all()) # all nonzero + # I haven't computed these values manually, I just copied them + np.testing.assert_almost_equal(coef.X.T, [[ 3.1052632, -0.2631579]]) + + +class PolynomialFeaturesTest(unittest.TestCase): + def test_1d(self): + x, y, z = (ContinuousVariable(n) for n in "xyz") + domain = Domain([x], y, [z]) + data = Table.from_numpy( + domain, + [[1], [2], [3]], [1, 2, 3], [[1], [2], [3]]) + data2 = Table.from_numpy( + domain, + [[3], [4], [5]], [1, 2, 3], [[1], [2], [3]]) + tf = PolynomialFeatures(1, False)(data) + self.assertIs(tf.domain.class_var, y) + np.testing.assert_equal(tf.Y, [1, 2, 3]) + self.assertEqual(tf.domain.metas, (z, )) + np.testing.assert_equal(tf.metas.T, [[1, 2, 3]]) + + np.testing.assert_equal(tf.X.T, [[1, 2, 3]]) + np.testing.assert_equal(data2.transform(tf.domain).X.T, [[3, 4, 5]]) + + tf = PolynomialFeatures(1, True)(data) + np.testing.assert_equal( + tf.X.T, + [[1, 1, 1], [1, 2, 3]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[1, 1, 1], [3, 4, 5]]) + + tf = PolynomialFeatures(2, True)(data) + np.testing.assert_equal( + tf.X.T, + [[1, 1, 1], [1, 2, 3], [1, 4, 9]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[1, 1, 1], [3, 4, 5], [9, 16, 25]]) + + tf = PolynomialFeatures(2, False)(data) + np.testing.assert_equal( + tf.X.T, + [[1, 2, 3], [1, 4, 9]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[3, 4, 5], [9, 16, 25]]) + + tf = PolynomialFeatures(3, True)(data) + np.testing.assert_equal( + tf.X.T, + [[1, 1, 1], [1, 2, 3], [1, 4, 9], [1, 8, 27]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[1, 1, 1], [3, 4, 5], [9, 16, 25], [27, 64, 125]]) + + tf = PolynomialFeatures(3, False)(data) + np.testing.assert_equal( + tf.X.T, + [[1, 2, 3], [1, 4, 9], [1, 8, 27]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[3, 4, 5], [9, 16, 25], [27, 64, 125]]) + + tf = PolynomialFeatures(0, True)(data) + np.testing.assert_equal( + tf.X.T, + [[1, 1, 1]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[1, 1, 1]]) + + tf = PolynomialFeatures(0, False)(data) + np.testing.assert_equal( + tf.X.T, + [[0, 0, 0]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[0, 0, 0]]) + + def test_nd(self): + x, y, z = (ContinuousVariable(n) for n in "xyz") + domain = Domain([x, y, z]) + data = Table.from_numpy( + domain, + [[1, 2, 3], [4, 5, 6]]) + data2 = Table.from_numpy( + domain, + [[1, 3, 5]]) + + tf = PolynomialFeatures(1, False)(data) + np.testing.assert_equal(tf.X, data.X) + np.testing.assert_equal(data2.transform(tf.domain).X, data2.X) + + tf = PolynomialFeatures(1, True)(data) + np.testing.assert_equal(tf.X, [[1, 1, 2, 3], [1, 4, 5, 6]]) + np.testing.assert_equal(data2.transform(tf.domain).X, [[1, 1, 3, 5]]) + + tf = PolynomialFeatures(2, False)(data) + np.testing.assert_equal( + tf.X, + [[1, 2, 3, 1, 2, 3, 4, 6, 9], + [4, 5, 6, 16, 20, 24, 25, 30, 36]]) + np.testing.assert_equal( + data2.transform(tf.domain).X, + [[1, 3, 5, 1, 3, 5, 9, 15, 25]]) + + tf = PolynomialFeatures(2, True)(data) + np.testing.assert_equal( + tf.X, + [[1, 1, 2, 3, 1, 2, 3, 4, 6, 9], + [1, 4, 5, 6, 16, 20, 24, 25, 30, 36]]) + np.testing.assert_equal( + data2.transform(tf.domain).X, + [[1, 1, 3, 5, 1, 3, 5, 9, 15, 25]]) + + tf = PolynomialFeatures(0, True)(data) + np.testing.assert_equal( + tf.X.T, + [[1, 1]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[1]]) + + tf = PolynomialFeatures(0, False)(data) + np.testing.assert_equal( + tf.X.T, + [[0, 0]]) + np.testing.assert_equal( + data2.transform(tf.domain).X.T, + [[0]]) + + +class ModelsTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.data = Table("iris")[:10] + + def test_regressto0(self): + model = RegressTo0()(self.data) + prediction = model(self.data[:5]) + np.testing.assert_equal(prediction, [0] * 5) + + def test_tempmeanmodel(self): + model = TempMeanModel(42) + prediction = model(self.data[:5]) + np.testing.assert_equal(prediction, [42] * 5) + + def test_polynomiallearnerwrapper(self): + u, x, y, z = (ContinuousVariable(n) for n in "uxyz") + domain = Domain([u, x], y) + data = Table.from_numpy( + domain, + [[1, 1], [0, 2], [np.nan, 3], [-1, np.nan], [2, 4]], + [3, 5, 7, 7, np.nan]) + data2 = Table.from_numpy( + Domain([x, z]), + [[5, 6], [7, 8]]) + + learner = PolynomialLearnerWrapper( + x, y, 1, LinearRegressionLearner(fit_intercept=False), + fit_intercept=True, preprocessors=None) + model = learner(data) + np.testing.assert_almost_equal(model.coefficients, [1, 2]) + np.testing.assert_almost_equal(model(data2), [11, 15]) + + learner = PolynomialLearnerWrapper( + x, y, 1, LinearRegressionLearner(fit_intercept=True), + fit_intercept=False, preprocessors=False) + model = learner(data) + np.testing.assert_almost_equal(model.coefficients, [2]) + self.assertAlmostEqual(model.intercept, 1) + np.testing.assert_almost_equal(model(data2), [11, 15]) + + learner = PolynomialLearnerWrapper( + x, y, 1, LinearRegressionLearner(fit_intercept=False), + fit_intercept=False, preprocessors=None) + model = learner(data) + # I haven't computed this value manually, just copied the result + # But it must be something a bit larger than 2, hence... + np.testing.assert_almost_equal(model.coefficients, [2.4285714]) + self.assertEqual(model.intercept, 0) + np.testing.assert_almost_equal(model(data2), [12.1428571, 17]) + + learner = PolynomialLearnerWrapper( + x, y, 2, LinearRegressionLearner(fit_intercept=False), + fit_intercept=False, preprocessors=False) + model = learner(data) + # I haven't computed this value manually, just copied the result + np.testing.assert_almost_equal(model.coefficients, + [3.1052632, -0.2631579]) + self.assertEqual(model.intercept, 0) + np.testing.assert_almost_equal(model(data2), [8.9473684, 8.8421053]) if __name__ == "__main__": - import unittest unittest.main()