Updates to support pandas 0.23

- Updated requirements.txt and setup.py to allow pandas 0.23 - Updated .travis.yml to test pandas 0.22 and 0.23 - Not updated to account for numpy 1.14 - Corrected imports of is_categorical_dtype from pandas.core.common to pandas.api.types - This was already implemented in pandas 0.19, so no try/except needed - Replaced pandas.DataFrame.from_items with from_dict, importing OrderedDict in tests/
badge · Jul 2, 2018 · 9f050bb · 9f050bb
1 parent 9d2ec18
commit 9f050bb
Show file tree

Hide file tree

Showing 11 changed files with 47 additions and 35 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -10,6 +10,8 @@ env:
     - PANDAS_VERSION=0.19 NUMPY_VERSION=1.12
     - PANDAS_VERSION=0.20 NUMPY_VERSION=1.13
     - PANDAS_VERSION=0.21 NUMPY_VERSION=1.13 NO_SLOW=false
+    - PANDAS_VERSION=0.22 NUMPY_VERSION=1.13 NO_SLOW=false
+    - PANDAS_VERSION=0.23 NUMPY_VERSION=1.13 NO_SLOW=false
 
 cache:
   directories:

diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,6 @@ cvxopt
 cvxpy <1.0
 numexpr
 numpy
-pandas >=0.19,<0.22
+pandas >=0.19,<0.24
 scipy
 scikit-learn >=0.19.0,<0.20
diff --git a/setup.py b/setup.py
@@ -60,7 +60,7 @@ def setup_package():
                         'cvxpy <1.0',
                         'numexpr',
                         'numpy',
-                        'pandas >=0.19, <0.22',
+                        'pandas >=0.19, <0.24',
                         'scipy',
                         'scikit-learn >=0.19.0, <0.20'],
                     extras_require={

diff --git a/sksurv/column.py b/sksurv/column.py
@@ -15,7 +15,7 @@
 import numpy
 import pandas
 
-from pandas.core.common import is_categorical_dtype
+from pandas.api.types import is_categorical_dtype
 
 __all__ = ['categorical_to_numeric', 'encode_categorical', 'standardize']
 
@@ -198,4 +198,5 @@ def transform(column):
     if isinstance(table, pandas.Series):
         return pandas.Series(transform(table), name=table.name, index=table.index)
     else:
+        # Raises a deprecation warning in pandas 0.23
         return table.apply(transform, axis=0, reduce=False)
diff --git a/sksurv/io/arffwrite.py b/sksurv/io/arffwrite.py
@@ -68,7 +68,7 @@ def _write_header(data, fp, relation_name, index):
         name = attribute_names[column]
         fp.write("@attribute {0}\t".format(name))
 
-        if pandas.core.common.is_categorical_dtype(series) or pandas.core.common.is_object_dtype(series):
+        if pandas.api.types.is_categorical_dtype(series) or pandas.api.types.is_object_dtype(series):
             _write_attribute_categorical(series, fp)
         elif numpy.issubdtype(series.dtype, numpy.floating):
             fp.write("real")
@@ -110,7 +110,7 @@ def _check_str_value(x):
 
 def _write_attribute_categorical(series, fp):
     """Write categories of a categorical/nominal attribute"""
-    if pandas.core.common.is_categorical_dtype(series.dtype):
+    if pandas.api.types.is_categorical_dtype(series.dtype):
         categories = series.cat.categories
         string_values = _check_str_array(categories)
     else:

diff --git a/sksurv/kernels/clinical.py b/sksurv/kernels/clinical.py
@@ -162,15 +162,15 @@ def _prepare_by_column_dtype(self, X):
 
         for i, dt in enumerate(X.dtypes):
             col = X.iloc[:, i]
-            if pandas.core.common.is_categorical_dtype(dt):
+            if pandas.api.types.is_categorical_dtype(dt):
                 if col.cat.ordered:
                     numeric_ranges.append(col.cat.codes.max() - col.cat.codes.min())
                     numeric_columns.append(i)
                 else:
                     nominal_columns.append(i)
 
                 col = col.cat.codes
-            elif pandas.core.common.is_numeric_dtype(dt):
+            elif pandas.api.types.is_numeric_dtype(dt):
                 numeric_ranges.append(col.max() - col.min())
                 numeric_columns.append(i)
             else:

diff --git a/sksurv/util.py b/sksurv/util.py
@@ -161,7 +161,7 @@ def safe_concat(objs, *args, **kwargs):
     categories = {}
     for df in objs:
         if isinstance(df, pandas.Series):
-            if pandas.core.common.is_categorical_dtype(df.dtype):
+            if pandas.api.types.is_categorical_dtype(df.dtype):
                 categories[df.name] = {"categories": df.cat.categories, "ordered": df.cat.ordered}
         else:
             dfc = df.select_dtypes(include=["category"])

diff --git a/tests/test_column.py b/tests/test_column.py
@@ -3,6 +3,7 @@
 import pandas
 import numpy
 
+from collections import OrderedDict
 from sksurv import column
 
 NUMERIC_DATA_FRAME = pandas.DataFrame(numpy.arange(50).reshape(10, 5))
@@ -93,10 +94,10 @@ def test_series_categorical():
         input_series = pandas.Series(pandas.Categorical.from_codes([1, 1, 0, 2, 0, 1, 2, 1, 2, 0, 0, 1, 2, 2],
                                                                    ["small", "medium", "large"], ordered=False),
                                      name="a_series")
-        expected_df = pandas.DataFrame.from_items(
+        expected_df = pandas.DataFrame.from_dict(OrderedDict(
             [("a_series=medium", numpy.array([1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0], dtype=float)),
              ("a_series=large", numpy.array([0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1], dtype=float))
-            ])
+            ]))
 
         actual_df = column.encode_categorical(input_series)
 
@@ -159,8 +160,10 @@ def test_duplicate_index(self):
         c = rnd.randn(len(a))
 
         index = numpy.ceil(numpy.arange(0, len(a) // 2, 0.5))
-        df = pandas.DataFrame.from_items([("a_category", pandas.Series(a, index=index)),
-                                          ("a_number", pandas.Series(c, index=index, copy=True))])
+        df = pandas.DataFrame.from_dict(OrderedDict([
+            ("a_category", pandas.Series(a, index=index)),
+            ("a_number", pandas.Series(c, index=index, copy=True))
+        ]))
 
         actual_df = column.encode_categorical(df)
 

diff --git a/tests/test_io.py b/tests/test_io.py
@@ -7,6 +7,8 @@
 import pandas
 import pandas.util.testing as tm
 
+from collections import OrderedDict
+
 from sksurv.io import loadarff, writearff
 
 
@@ -40,7 +42,7 @@ def test_dataframe(self):
         with StringIO(contents) as fp:
             actual_df = loadarff(fp)
 
-        expected_df = pandas.DataFrame.from_items(
+        expected_df = pandas.DataFrame.from_dict(OrderedDict(
             [("attr_nominal",
               pandas.Series(pandas.Categorical.from_codes(
                   [1, 2, 0, -1, 2, 1],
@@ -50,7 +52,7 @@ def test_dataframe(self):
                   [2, 0, -1, 1, 0, 1],
                   ['"hard liquor"', 'mate', '"red wine"'])))
              ]
-        )
+        ))
 
         tm.assert_frame_equal(expected_df, actual_df, check_exact=True)
 

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
@@ -3,6 +3,8 @@
 from numpy.testing import run_module_suite, TestCase
 import pandas.util.testing as tm
 
+from collections import OrderedDict
+
 from sksurv.preprocessing import OneHotEncoder
 
 
@@ -40,7 +42,7 @@ def encoded_data(data):
         else:
             expected.append((nam, col))
 
-    expected_data = pd.DataFrame.from_items(expected)
+    expected_data = pd.DataFrame.from_dict(OrderedDict(expected))
     return expected_data
 
 

diff --git a/tests/test_util.py b/tests/test_util.py
@@ -3,6 +3,8 @@
 import pandas
 import numpy
 
+from collections import OrderedDict
+
 from sksurv.util import safe_concat
 
 
@@ -13,9 +15,9 @@ def test_concat_numeric():
         a = pandas.Series(rnd.randn(100), name="col_A")
         b = pandas.Series(rnd.randn(100), name="col_B")
 
-        expected_df = pandas.DataFrame.from_items(
+        expected_df = pandas.DataFrame.from_dict(OrderedDict(
             [(a.name, a), (b.name, b)]
-        )
+        ))
 
         actual_df = safe_concat((a, b), axis=1)
 
@@ -28,9 +30,9 @@ def test_concat_numeric_categorical():
         b = pandas.Series(pandas.Categorical.from_codes(
             rnd.binomial(4, 0.6, 100), ["C1", "C2", "C3", "C4", "C5"]), name="col_B")
 
-        expected_df = pandas.DataFrame.from_items(
+        expected_df = pandas.DataFrame.from_dict(OrderedDict(
             [(a.name, a), (b.name, b)]
-        )
+        ))
 
         actual_df = safe_concat((a, b), axis=1)
 
@@ -39,22 +41,22 @@ def test_concat_numeric_categorical():
     @staticmethod
     def test_concat_categorical():
         rnd = numpy.random.RandomState(14)
-        a = pandas.DataFrame.from_items([
+        a = pandas.DataFrame.from_dict(OrderedDict([
             ("col_A", pandas.Series(pandas.Categorical.from_codes(
                 rnd.binomial(2, 0.6, 100), ["C1", "C2", "C3"]), name="col_A")),
-            ("col_B", rnd.randn(100))])
-        b = pandas.DataFrame.from_items([
+            ("col_B", rnd.randn(100))]))
+        b = pandas.DataFrame.from_dict(OrderedDict([
             ("col_A", pandas.Series(pandas.Categorical.from_codes(
                 rnd.binomial(2, 0.2, 100), ["C1", "C2", "C3"]), name="col_A")),
-            ("col_B", rnd.randn(100))])
+            ("col_B", rnd.randn(100))]))
 
-        expected_series = pandas.DataFrame.from_items([
+        expected_series = pandas.DataFrame.from_dict(OrderedDict([
             ("col_A", pandas.Series(pandas.Categorical.from_codes(
                 numpy.concatenate((a.col_A.cat.codes.values, b.col_A.cat.codes.values)),
                 ["C1", "C2", "C3"]
             ))),
             ("col_B", numpy.concatenate((a.col_B.values, b.col_B.values)))
-        ])
+        ]))
         expected_series.index = pandas.Index(a.index.tolist() + b.index.tolist())
 
         actual_series = safe_concat((a, b), axis=0)
@@ -63,24 +65,24 @@ def test_concat_categorical():
 
     def test_concat_categorical_mismatch(self):
         rnd = numpy.random.RandomState(14)
-        a = pandas.DataFrame.from_items([
+        a = pandas.DataFrame.from_dict(OrderedDict([
             ("col_A", pandas.Series(pandas.Categorical.from_codes(
                 rnd.binomial(2, 0.6, 100), ["C1", "C2", "C3"]), name="col_A")),
-            ("col_B", rnd.randn(100))])
-        b = pandas.DataFrame.from_items([
+            ("col_B", rnd.randn(100))]))
+        b = pandas.DataFrame.from_dict(OrderedDict([
             ("col_A", pandas.Series(pandas.Categorical.from_codes(
                 rnd.binomial(3, 0.6, 100), ["C1", "C2", "C3", "C4"]), name="col_A")),
-            ("col_B", rnd.randn(100))])
+            ("col_B", rnd.randn(100))]))
 
         self.assertRaisesRegex(ValueError, "categories for column col_A do not match",
                                safe_concat, (a, b), axis=0)
 
     @staticmethod
     def test_concat_dataframe_numeric_categorical():
         rnd = numpy.random.RandomState(14)
-        numeric_df = pandas.DataFrame.from_items(
+        numeric_df = pandas.DataFrame.from_dict(OrderedDict(
             [("col_A", rnd.randn(100)), ("col_B", rnd.randn(100))]
-        )
+        ))
 
         cat_series = pandas.Series(pandas.Categorical.from_codes(
             rnd.binomial(4, 0.6, 100), ["C1", "C2", "C3", "C4", "C5"]), name="col_C")
@@ -94,18 +96,18 @@ def test_concat_dataframe_numeric_categorical():
 
     def test_concat_duplicate_columns(self):
         rnd = numpy.random.RandomState(14)
-        numeric_df = pandas.DataFrame.from_items([
+        numeric_df = pandas.DataFrame.from_dict(OrderedDict([
             ("col_N", rnd.randn(100)), ("col_B", rnd.randn(100)),
             ("col_A", pandas.Series(pandas.Categorical.from_codes(
                 rnd.binomial(4, 0.2, 100), ["C1", "C2", "C3", "C4", "C5"]), name="col_A")),
-        ])
+        ]))
 
-        cat_df = pandas.DataFrame.from_items([
+        cat_df = pandas.DataFrame.from_dict(OrderedDict([
             ("col_A", pandas.Series(pandas.Categorical.from_codes(
                 rnd.binomial(4, 0.6, 100), ["C1", "C2", "C3", "C4", "C5"]), name="col_A")),
             ("col_C", pandas.Series(pandas.Categorical.from_codes(
                 rnd.binomial(1, 0.6, 100), ["Yes", "No"]), name="col_C")),
-        ])
+        ]))
 
         self.assertRaisesRegex(ValueError, "duplicate columns col_A",
                                safe_concat, (numeric_df, cat_df), axis=1)