From 16e1703a5703558d1c202a8c36d20b1e1f55b969 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 28 Oct 2021 15:30:17 +0200 Subject: [PATCH] Maintenance (#117) --- .readthedocs.yml | 2 +- azure-pipelines.yml | 12 ++++++------ examples/classification/plot_tsbf.py | 6 +++--- pyts/base.py | 4 ++++ .../tests/test_time_series_forest.py | 2 +- pyts/classification/tests/test_tsbf.py | 2 +- pyts/classification/time_series_forest.py | 8 ++++++-- pyts/classification/tsbf.py | 16 +++++++++++++--- 8 files changed, 35 insertions(+), 17 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 068a136..c9d1c09 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -4,7 +4,7 @@ formats: - none requirements_file: requirements.txt python: - version: 3.7 + version: 3.8 pip_install: true install: - requirements: docs/requirements.txt diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 1987739..7dad1e2 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -47,7 +47,7 @@ jobs: python.version: '3.9' numpy.version: '1.*' scipy.version: '1.*' - scikit-learn.version: '0.*' + scikit-learn.version: '1.*' joblib.version: '0.*' numba.version: '0.*' @@ -77,7 +77,7 @@ jobs: python.version: '3.8' numpy.version: '1.*' scipy.version: '1.*' - scikit-learn.version: '0.*' + scikit-learn.version: '1.*' joblib.version: '0.*' numba.version: '0.*' @@ -112,7 +112,7 @@ jobs: python.version: '3.9' numpy.version: '1.*' scipy.version: '1.*' - scikit-learn.version: '0.*' + scikit-learn.version: '1.*' joblib.version: '0.*' numba.version: '0.*' @@ -142,7 +142,7 @@ jobs: python.version: '3.8' numpy.version: '1.*' scipy.version: '1.*' - scikit-learn.version: '0.*' + scikit-learn.version: '1.*' joblib.version: '0.*' numba.version: '0.*' @@ -183,7 +183,7 @@ jobs: python.version: '3.9' numpy.version: '1.*' scipy.version: '1.*' - scikit-learn.version: '0.*' + scikit-learn.version: '1.*' joblib.version: '0.*' numba.version: '0.*' @@ -213,7 +213,7 @@ jobs: python.version: '3.8' numpy.version: '1.*' scipy.version: '1.*' - scikit-learn.version: '0.*' + scikit-learn.version: '1.*' joblib.version: '0.*' numba.version: '0.*' diff --git a/examples/classification/plot_tsbf.py b/examples/classification/plot_tsbf.py index 9e0f777..e79345d 100644 --- a/examples/classification/plot_tsbf.py +++ b/examples/classification/plot_tsbf.py @@ -56,10 +56,10 @@ clf = TSBF(random_state=43, bins=5) clf.fit(X_train, y_train) -plt.bar(np.arange(clf.n_features_), clf.feature_importances_) +plt.bar(np.arange(clf.n_features_in_), clf.feature_importances_) plt.title('Feature importance scores') -plt.xticks(np.arange(clf.n_features_), - ['feature {}'.format(i) for i in range(clf.n_features_)], +plt.xticks(np.arange(clf.n_features_in_), + ['feature {}'.format(i) for i in range(clf.n_features_in_)], rotation=90) plt.ylabel("Mean decrease in impurity") plt.tight_layout() diff --git a/pyts/base.py b/pyts/base.py index d2a17bd..4264fcc 100644 --- a/pyts/base.py +++ b/pyts/base.py @@ -77,6 +77,8 @@ def fit_transform(self, X, y=None, **fit_params): class UnivariateClassifierMixin: """Mixin class for all univariate classifiers in pyts.""" + _estimator_type = "classifier" + def score(self, X, y, sample_weight=None): """ Return the mean accuracy on the given test data and labels. @@ -104,6 +106,8 @@ def score(self, X, y, sample_weight=None): class MultivariateClassifierMixin: """Mixin class for all multivariate classifiers in pyts.""" + _estimator_type = "classifier" + def score(self, X, y, sample_weight=None): """ Return the mean accuracy on the given test data and labels. diff --git a/pyts/classification/tests/test_time_series_forest.py b/pyts/classification/tests/test_time_series_forest.py index 6f816fc..d958bb8 100644 --- a/pyts/classification/tests/test_time_series_forest.py +++ b/pyts/classification/tests/test_time_series_forest.py @@ -144,7 +144,7 @@ def test_actual_results_fixed_indices(indices, arr_desired): def test_attributes_time_series_forest(params): """Test the attributes of a fitted instance of TimeSeriesForest.""" real_attributes = ['base_estimator_', 'classes_', 'estimators_', - 'feature_importances_', 'indices_', 'n_features_', + 'feature_importances_', 'indices_', 'n_features_in_', 'oob_decision_function_', 'oob_score_'] fake_attributes = ['yolo', 'whoopsy', 'mistake_were_made_'] diff --git a/pyts/classification/tests/test_tsbf.py b/pyts/classification/tests/test_tsbf.py index 4d64aa8..a0cc116 100644 --- a/pyts/classification/tests/test_tsbf.py +++ b/pyts/classification/tests/test_tsbf.py @@ -350,7 +350,7 @@ def test_attributes_tsbf(X, y, params): assert clf.feature_importances_.shape == (n_features,) assert clf.interval_indices_.ndim == 2 assert isinstance(clf.min_subsequence_size_, (int, np.integer)) - assert clf.n_features_ == n_features + assert clf.n_features_in_ == n_features if params.get('oob_score', TSBF().get_params()['oob_score']): assert clf.oob_decision_function_.shape == (n_samples, n_classes) assert isinstance(clf.oob_score_, (float, np.floating)) diff --git a/pyts/classification/time_series_forest.py b/pyts/classification/time_series_forest.py index ec53b23..9e35adf 100644 --- a/pyts/classification/time_series_forest.py +++ b/pyts/classification/time_series_forest.py @@ -331,7 +331,7 @@ class TimeSeriesForest(BaseEstimator, UnivariateClassifierMixin): of the windows. The second column consists of the ending indices (excluded) of the windows. - n_features_ : int + n_features_in_ : int The number of features when ``fit`` is performed. oob_decision_function_ : None or array, shape = (n_samples, n_classes) @@ -516,7 +516,11 @@ def fit(self, X, y): self.estimators_ = self._pipeline['rfc'].estimators_ self.feature_importances_ = self._pipeline['rfc'].feature_importances_ self.indices_ = self._pipeline['fe'].indices_ - self.n_features_ = self._pipeline['rfc'].n_features_ + self.n_features_in_ = ( + self._pipeline['rfc'].n_features_in_ + if hasattr(self._pipeline['rfc'], 'n_features_in_') + else self._pipeline['rfc'].n_features_ + ) self.oob_decision_function_ = getattr( self._pipeline['rfc'], 'oob_decision_function_', None) self.oob_score_ = getattr(self._pipeline['rfc'], 'oob_score_', None) diff --git a/pyts/classification/tsbf.py b/pyts/classification/tsbf.py index 626603e..e85e6e1 100644 --- a/pyts/classification/tsbf.py +++ b/pyts/classification/tsbf.py @@ -446,7 +446,7 @@ class TSBF(BaseEstimator, UnivariateClassifierMixin): min_subsequence_size_ : int The actual minimum length of the subsequences. - n_features_ : int + n_features_in_ : int The number of features when ``fit`` is performed. oob_decision_function_ : None or array, shape = (n_samples, n_classes) @@ -660,7 +660,13 @@ def fit(self, X, y): rfc.fit(X_features, y_features) X_oob_proba = rfc.oob_decision_function_.reshape( n_samples, n_subsequences, n_classes) - if np.isnan(X_oob_proba).any(): + + # Check for subsequences without OOB scores + no_oob_scores = ( + (np.isnan(X_oob_proba).any() or + np.all(X_oob_proba == 0., axis=2).any()) + ) + if no_oob_scores: raise ValueError( "At least one sample was never left out during the bootstrap. " "Increase the number of trees (n_estimators)." @@ -683,7 +689,11 @@ def fit(self, X, y): self.feature_importances_ = clf.feature_importances_ self.interval_indices_ = feature_extractor.interval_indices_ self.min_subsequence_size_ = feature_extractor.min_subsequence_size_ - self.n_features_ = clf.n_features_ + self.n_features_in_ = ( + clf.n_features_in_ + if hasattr(clf, 'n_features_in_') + else clf.n_features_ + ) self.oob_decision_function_ = getattr( clf, 'oob_decision_function_', None) self.oob_score_ = getattr(clf, 'oob_score_', None)