Merge pull request #4961 from janezd/test-warnings

[MNT] Reduce the number of warnings in tests
biolab · Sep 11, 2020 · 32c5ca7 · 32c5ca7
2 parents a9c7853 + 6e227ab
commit 32c5ca7
Show file tree

Hide file tree

Showing 50 changed files with 372 additions and 211 deletions.
diff --git a/Orange/base.py b/Orange/base.py
@@ -1,6 +1,6 @@
 import inspect
 import itertools
-from collections import Iterable
+from collections.abc import Iterable
 import re
 import warnings
 from typing import Callable

diff --git a/Orange/classification/calibration.py b/Orange/classification/calibration.py
@@ -74,7 +74,7 @@ def fit_storage(self, data):
                 or len(data.domain.class_var.values) != 2:
             raise ValueError("ThresholdLearner requires a binary class")
 
-        res = TestOnTrainingData(data, [self.base_learner], store_models=True)
+        res = TestOnTrainingData(store_models=True)(data, [self.base_learner])
         model = res.models[0, 0]
         curves = Curves.from_results(res)
         curve = [curves.ca, curves.f1][self.threshold_criterion]()
@@ -160,7 +160,7 @@ def fit_storage(self, data):
         on training data and use scipy's `_SigmoidCalibration` or
         `IsotonicRegression` to prepare calibrators.
         """
-        res = TestOnTrainingData(data, [self.base_learner], store_models=True)
+        res = TestOnTrainingData(store_models=True)(data, [self.base_learner])
         model = res.models[0, 0]
         probabilities = res.probabilities[0]
         return self.get_model(model, res.actual, probabilities)

diff --git a/Orange/classification/softmax_regression.py b/Orange/classification/softmax_regression.py
@@ -9,7 +9,7 @@
 
 
 class SoftmaxRegressionLearner(Learner):
-    """L2 regularized softmax regression classifier.
+    r"""L2 regularized softmax regression classifier.
     Uses the L-BFGS algorithm to minimize the categorical
     cross entropy cost with L2 regularization. This model is suitable
     when dealing with a multi-class classification problem.

diff --git a/Orange/data/domain.py b/Orange/data/domain.py
@@ -2,7 +2,7 @@
 import weakref
 
 from math import log
-from collections import Iterable
+from collections.abc import Iterable
 from itertools import chain
 from numbers import Integral
 
@@ -148,12 +148,12 @@ def __init__(self, attributes, class_vars=None, metas=None, source=None):
                         raise TypeError(
                             "descriptors must be instances of Variable, "
                             "not '%s'" % type(var).__name__)
-      
+
         names = [var.name for var in chain(attributes, class_vars, metas)]
         if len(names) != len(set(names)):
             raise Exception('All variables in the domain should have'
                             ' unique names.')
-  
+
         # Store everything
         self.attributes = tuple(attributes)
         self.class_vars = tuple(class_vars)

diff --git a/Orange/data/io.py b/Orange/data/io.py
@@ -300,7 +300,13 @@ class ExcelReader(_BaseExcelReader):
     @property
     def workbook(self) -> openpyxl.Workbook:
         if not self._workbook:
-            self._workbook = openpyxl.load_workbook(self.filename,
+            with warnings.catch_warnings():
+                # We don't care about extensions, but we hate warnings
+                warnings.filterwarnings(
+                    "ignore",
+                    ".*extension is not supported and will be removed.*",
+                    UserWarning)
+                self._workbook = openpyxl.load_workbook(self.filename,
                                                     data_only=True)
         return self._workbook
 

diff --git a/Orange/data/table.py b/Orange/data/table.py
@@ -4,7 +4,7 @@
 import warnings
 import weakref
 import zlib
-from collections import Iterable, Sequence, Sized
+from collections.abc import Iterable, Sequence, Sized
 from functools import reduce
 from itertools import chain
 from numbers import Real, Integral

diff --git a/Orange/data/tests/test_io_base.py b/Orange/data/tests/test_io_base.py
@@ -214,7 +214,8 @@ def test_adjust_data_width_shorten(self):
         header.names = names
         header.types = types
         header.flags = flags
-        adjusted, n = DataTableMixin.adjust_data_width(self.header0, header)
+        with self.assertWarns(UserWarning):
+            adjusted, n = DataTableMixin.adjust_data_width(self.header0, header)
         np.testing.assert_array_equal(
             adjusted, np.array(self.header0, dtype=object)[:, :3])
         self.assertEqual(adjusted.shape, (len(self.header0), 3))

diff --git a/Orange/data/tests/test_variable.py b/Orange/data/tests/test_variable.py
@@ -508,8 +508,7 @@ def test_remove_ordered(self):
         """
         self.assertLess(Orange.__version__, "3.29.0")
 
-    @staticmethod
-    def test_pickle_backward_compatibility():
+    def test_pickle_backward_compatibility(self):
         """
         Test that pickle made with an older version of Orange are correctly
         loaded after changes in DiscreteVariable
@@ -523,9 +522,11 @@ def test_pickle_backward_compatibility():
                 this_dir, "..", "..", "tests", "datasets"
             )
             # pickle with values as list
-            Table(os.path.join(datasets_dir, "sailing-orange-3-20.pkl"))
+            with self.assertWarns(OrangeDeprecationWarning):
+                Table(os.path.join(datasets_dir, "sailing-orange-3-20.pkl"))
             # pickle with values as tuple list
-            Table(os.path.join(datasets_dir, "iris-orange-3-25.pkl"))
+            with self.assertWarns(OrangeDeprecationWarning):
+                Table(os.path.join(datasets_dir, "iris-orange-3-25.pkl"))
 
 
 @variabletest(ContinuousVariable)

diff --git a/Orange/data/variable.py b/Orange/data/variable.py
@@ -1,6 +1,6 @@
 import re
 import warnings
-from collections import Iterable
+from collections.abc import Iterable
 
 from datetime import datetime, timedelta, timezone
 from numbers import Number, Real, Integral
@@ -579,7 +579,8 @@ def repr_val(self, val):
         """
         Return the value as a string with the prescribed number of decimals.
         """
-        if isnan(val):
+        # Table value can't be inf, but repr_val can be used to print any float
+        if not np.isfinite(val):
             return "?"
         if self.format_str != "%g" \
                 and abs(round(val, self._number_of_decimals) - val) \
@@ -647,9 +648,9 @@ def __init__(
     def ordered(self):
         warnings.warn(
             "ordered is deprecated. It will be removed in future versions.",
-            # FutureWarning warning is used instead of OrangeDeprecation
+            # DeprecationWarning warning is used instead of OrangeDeprecation
             # warning otherwise tests fail (__repr__ still asks for ordered)
-            FutureWarning
+            DeprecationWarning
         )
         return None
 

diff --git a/Orange/preprocess/discretize.py b/Orange/preprocess/discretize.py
@@ -681,7 +681,7 @@ def _entropy_discretize_sorted(cls, C, force=False):
         delta = np.log2(3 ** k - 2) - (k * ES - k1 * ES1 - k2 * ES2)
         N = float(np.sum(S_c))
 
-        if Gain > np.log2(N - 1) / N + delta / N:
+        if N > 1 and Gain > np.log2(N - 1) / N + delta / N:
             # Accept the cut point and recursively split the subsets.
             left, right = [], []
             if k1 > 1 and cut_index > 1:

diff --git a/Orange/projection/manifold.py b/Orange/projection/manifold.py
@@ -1,6 +1,6 @@
 import logging
 import warnings
-from collections import Iterable
+from collections.abc import Iterable
 from itertools import chain
 
 import numpy as np

diff --git a/Orange/regression/linear_bfgs.py b/Orange/regression/linear_bfgs.py
@@ -9,7 +9,7 @@
 
 
 class LinearRegressionLearner(Learner):
-    '''L2 regularized linear regression (a.k.a Ridge regression)
+    r'''L2 regularized linear regression (a.k.a Ridge regression)
 
     This model uses the L-BFGS algorithm to minimize the linear least
     squares penalty with L2 regularization. When using this model you

diff --git a/Orange/statistics/distribution.py b/Orange/statistics/distribution.py
@@ -1,4 +1,4 @@
-from collections import Iterable
+from collections.abc import Iterable
 from numbers import Real
 import zlib
 

diff --git a/Orange/tests/test_classification.py b/Orange/tests/test_classification.py
@@ -21,7 +21,8 @@
     MajorityLearner,
     RandomForestLearner, SimpleTreeLearner, SoftmaxRegressionLearner,
     SVMLearner, LinearSVMLearner, OneClassSVMLearner, TreeLearner, KNNLearner,
-    SimpleRandomForestLearner, EllipticEnvelopeLearner)
+    SimpleRandomForestLearner, EllipticEnvelopeLearner, ThresholdLearner,
+    CalibratedLearner)
 from Orange.classification.rules import _RuleLearner
 from Orange.data import (ContinuousVariable, DiscreteVariable,
                          Domain, Table)
@@ -214,14 +215,13 @@ def test_result_shape(self):
         """
         iris = Table('iris')
         for learner in all_learners():
+            # calibration, threshold learners' __init__ requires arguments
+            if learner in (ThresholdLearner, CalibratedLearner):
+                continue
+
             with self.subTest(learner.__name__):
                 # model trained on only one value (but three in the domain)
-                try:
-                    model = learner()(iris[0:100])
-                except TypeError as e:
-                    # calibration, threshold learners are skipped
-                    # they have some specifics regarding data
-                    continue
+                model = learner()(iris[0:100])
 
                 res = model(iris[0:50])
                 self.assertTupleEqual((50,), res.shape)
@@ -360,18 +360,18 @@ def test_unknown(self):
     def test_missing_class(self):
         table = Table(test_filename("datasets/adult_sample_missing"))
         for learner in all_learners():
-            try:
+            # calibration, threshold learners' __init__ require arguments
+            if learner in (ThresholdLearner, CalibratedLearner):
+                continue
+            # Skip slow tests
+            if isinstance(learner, _RuleLearner):
+                continue
+            with self.subTest(learner.__name__):
                 learner = learner()
                 if isinstance(learner, NuSVMLearner):
                     learner.params["nu"] = 0.01
-                # Skip slow tests
-                if isinstance(learner, _RuleLearner):
-                    continue
                 model = learner(table)
                 model(table)
-            except TypeError:
-                traceback.print_exc()
-                continue
 
 
 class LearnerAccessibility(unittest.TestCase):
@@ -389,48 +389,46 @@ def test_all_learners_accessible_in_Orange_classification_namespace(self):
     def test_all_models_work_after_unpickling(self):
         datasets = [Table('iris'), Table('titanic')]
         for learner in list(all_learners()):
-            try:
-                learner = learner()
-            except Exception:
-                print('%s cannot be used with default parameters' % learner.__name__)
-                traceback.print_exc()
+            # calibration, threshold learners' __init__ require arguments
+            if learner in (ThresholdLearner, CalibratedLearner):
                 continue
             # Skip slow tests
             if isinstance(learner, _RuleLearner):
                 continue
-
-            for ds in datasets:
-                model = learner(ds)
-                s = pickle.dumps(model, 0)
-                model2 = pickle.loads(s)
-
-                np.testing.assert_almost_equal(
-                    Table.from_table(model.domain, ds).X,
-                    Table.from_table(model2.domain, ds).X)
-                np.testing.assert_almost_equal(
-                    model(ds), model2(ds),
-                    err_msg='%s does not return same values when unpickled %s'
-                    % (learner.__class__.__name__, ds.name))
+            with self.subTest(learner.__name__):
+                learner = learner()
+                for ds in datasets:
+                    model = learner(ds)
+                    s = pickle.dumps(model, 0)
+                    model2 = pickle.loads(s)
+
+                    np.testing.assert_almost_equal(
+                        Table.from_table(model.domain, ds).X,
+                        Table.from_table(model2.domain, ds).X)
+                    np.testing.assert_almost_equal(
+                        model(ds), model2(ds),
+                        err_msg='%s does not return same values when unpickled %s'
+                        % (learner.__class__.__name__, ds.name))
 
     def test_adequacy_all_learners(self):
         for learner in all_learners():
-            try:
+            # calibration, threshold learners' __init__ requires arguments
+            if learner in (ThresholdLearner, CalibratedLearner):
+                continue
+            with self.subTest(learner.__name__):
                 learner = learner()
                 table = Table("housing")
                 self.assertRaises(ValueError, learner, table)
-            except TypeError:
-                traceback.print_exc()
-                continue
 
     def test_adequacy_all_learners_multiclass(self):
         for learner in all_learners():
-            try:
+            # calibration, threshold learners' __init__ require arguments
+            if learner in (ThresholdLearner, CalibratedLearner):
+                continue
+            with self.subTest(learner.__name__):
                 learner = learner()
                 table = Table(test_filename("datasets/test8.tab"))
                 self.assertRaises(ValueError, learner, table)
-            except TypeError:
-                traceback.print_exc()
-                continue
 
 
 class LearnerReprs(unittest.TestCase):

diff --git a/Orange/tests/test_io.py b/Orange/tests/test_io.py
@@ -165,20 +165,24 @@ def test_load_pickle(self):
         """
         with warnings.catch_warnings():
             # in unittests on travis/github actions OrangeDeprecationWarning
-            # is raised as an error. With this statement it si disabled only
+            # is raised as an error. With this statement it is disabled only
             # for this test - when unpickling pickle created with version older
             # than 3.27 ordered parameter in DiscreteVariable which is
             # deprecated still appears - which will raise deprecation warning
             warnings.simplefilter('default', OrangeDeprecationWarning)
             # load pickles created with Orange 3.20
             # in next version there is a change in variables.py - line 738
             # which broke back compatibility - tests introduced after the fix
-            data1 = Table("datasets/sailing-orange-3-20.pkl")
-            data2 = Table("datasets/sailing-orange-3-20.pkl.gz")
+            with self.assertWarns(OrangeDeprecationWarning):
+                data1 = Table("datasets/sailing-orange-3-20.pkl")
+            with self.assertWarns(OrangeDeprecationWarning):
+                data2 = Table("datasets/sailing-orange-3-20.pkl.gz")
 
             # load pickles created with Orange 3.21
-            data3 = Table("datasets/sailing-orange-3-21.pkl")
-            data4 = Table("datasets/sailing-orange-3-21.pkl.gz")
+            with self.assertWarns(OrangeDeprecationWarning):
+                data3 = Table("datasets/sailing-orange-3-21.pkl")
+            with self.assertWarns(OrangeDeprecationWarning):
+                data4 = Table("datasets/sailing-orange-3-21.pkl.gz")
 
             examples_count = 20
             self.assertEqual(examples_count, len(data1))

diff --git a/Orange/tests/test_naive_bayes.py b/Orange/tests/test_naive_bayes.py
@@ -58,7 +58,7 @@ def test_degenerate(self):
     def test_allnan_cv(self):
         # GH 2740
         data = Table(test_filename('datasets/lenses.tab'))
-        cv = CrossValidation()
+        cv = CrossValidation(stratified=False)
         results = cv(data, [self.learner])
         self.assertFalse(any(results.failed))
 

diff --git a/Orange/tests/test_util.py b/Orange/tests/test_util.py
@@ -143,7 +143,10 @@ def test_array_not_equal(self, array):
     def test_csc_array_equal(self):
         a1 = sp.csc_matrix(([1, 4, 5], ([0, 0, 1], [0, 2, 2])), shape=(2, 3))
         a2 = sp.csc_matrix(([5, 1, 4], ([1, 0, 0], [2, 0, 2])), shape=(2, 3))
-        a2[0, 1] = 0  # explicitly setting to 0
+        with warnings.catch_warnings():
+            # this is just inefficiency in tests, not the tested code
+            warnings.filterwarnings("ignore", ".*", sp.SparseEfficiencyWarning)
+            a2[0, 1] = 0  # explicitly setting to 0
         self.assertTrue(array_equal(a1, a2))
 
     def test_csc_scr_equal(self):

diff --git a/Orange/util.py b/Orange/util.py
@@ -384,13 +384,18 @@ def _reprable_omit_param(self, name, default, value):
             return False
 
     def _reprable_items(self):
-        for name, default in self._reprable_fields():
-            try:
-                value = getattr(self, name)
-            except AttributeError:
-                value = _undef
-            if not self._reprable_omit_param(name, default, value):
-                yield name, default, value
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", DeprecationWarning)
+            warnings.simplefilter("error", PendingDeprecationWarning)
+            for name, default in self._reprable_fields():
+                try:
+                    value = getattr(self, name)
+                except (DeprecationWarning, PendingDeprecationWarning):
+                    continue
+                except AttributeError:
+                    value = _undef
+                if not self._reprable_omit_param(name, default, value):
+                    yield name, default, value
 
     def _repr_pretty_(self, p, cycle):
         """IPython pretty print hook."""

diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py
@@ -1539,7 +1539,7 @@ def pandas_to_table(df):
     columns = []  # type: List[Tuple[Orange.data.Variable, np.ndarray]]
 
     for header, series in df.items():  # type: (Any, pd.Series)
-        if pdtypes.is_categorical(series):
+        if pdtypes.is_categorical_dtype(series):
             coldata = series.values  # type: pd.Categorical
             categories = [str(c) for c in coldata.categories]
             var = Orange.data.DiscreteVariable.make(