Merge 4a20e6c into a97c18b

holoviz · Oct 2, 2018 · 9cd70ce · 9cd70ce
2 parents a97c18b + 4a20e6c
commit 9cd70ce
Show file tree

Hide file tree

Showing 7 changed files with 308 additions and 1 deletion.
diff --git a/.travis.yml b/.travis.yml
@@ -63,6 +63,9 @@ jobs:
     - <<: *default
       env: TOX_ENV=with_numpy
 
+    - <<: *default
+      env: TOX_ENV=with_pandas
+
     - <<: *default
       env: TOX_ENV=coverage
 

diff --git a/param/__init__.py b/param/__init__.py
@@ -1287,6 +1287,122 @@ def __init__(self, **params):
         super(Array,self).__init__(ndarray, allow_None=True, **params)
 
 
+class DataFrame(ClassSelector):
+    """
+    Parameter whose value is a pandas DataFrame.
+
+    The structure of the DataFrame can be constrained by the rows and
+    columns arguments:
+
+    rows: If specified, may be a number or an integer bounds tuple to
+    constrain the allowable number of rows.
+
+    columns: If specified, may be a number, an integer bounds tuple, a
+    list or a set. If the argument is numeric, constrains the number of
+    columns using the same semantics as used for rows. If either a list
+    or set of strings, the column names will be validated. If a set is
+    used, the supplied DataFrame must contain the specified columns and
+    if a list is given, the supplied DataFrame must contain exactly the
+    same columns and in the same order and no other columns.
+    """
+    __slots__ = ['rows','columns', 'ordered']
+
+    def __init__(self, rows=None, columns=None, ordered=None, **params):
+        from pandas import DataFrame as pdDFrame
+        self.rows = rows
+        self.columns = columns
+        self.ordered = ordered
+        super(DataFrame,self).__init__(pdDFrame, allow_None=True, **params)
+        self._check_value(self.default)
+
+
+    def _length_bounds_check(self, bounds, length, name):
+        message = '{name} length {length} does not match declared bounds of {bounds}'
+        if not isinstance(bounds, tuple):
+            if (bounds != length):
+                raise ValueError(message.format(name=name, length=length, bounds=bounds))
+            else:
+                return
+        (lower, upper) = bounds
+        failure = ((lower is not None and (length < lower))
+                   or (upper is not None and length > upper))
+        if failure:
+            raise ValueError(message.format(name=name,length=length, bounds=bounds))
+
+    def _check_value(self,val,obj=None):
+        super(DataFrame, self)._check_value(val, obj)
+
+        if isinstance(self.columns, set) and self.ordered is True:
+            raise ValueError('Columns cannot be ordered when specified as a set')
+
+        if self.columns is None:
+            pass
+        elif (isinstance(self.columns, tuple) and len(self.columns)==2
+              and all(isinstance(v, (type(None), numbers.Number)) for v in self.columns)): # Numeric bounds tuple
+            self._length_bounds_check(self.columns, len(val.columns), 'Columns')
+        elif isinstance(self.columns, (list, set)):
+            self.ordered = isinstance(self.columns, list) if self.ordered is None else self.ordered
+            difference = set(self.columns) - set([str(el) for el in val.columns])
+            if difference:
+                msg = 'Provided DataFrame columns {found} does not contain required columns {expected}'
+                raise ValueError(msg.format(found=list(val.columns), expected=sorted(self.columns)))
+        else:
+            self._length_bounds_check(self.columns, len(val.columns), 'Column')
+
+        if self.ordered:
+            if list(val.columns) != list(self.columns):
+                msg = 'Provided DataFrame columns {found} must exactly match {expected}'
+                raise ValueError(msg.format(found=list(val.columns), expected=self.columns))
+
+        if self.rows is not None:
+            self._length_bounds_check(self.rows, len(val), 'Row')
+
+    def __set__(self,obj,val):
+        self._check_value(val,obj)
+        super(DataFrame,self).__set__(obj,val)
+
+
+class Series(ClassSelector):
+    """
+    Parameter whose value is a pandas Series.
+
+    The structure of the Series can be constrained by the rows argument
+    which may be a number or an integer bounds tuple to constrain the
+    allowable number of rows.
+    """
+    __slots__ = ['rows']
+
+    def _length_bounds_check(self, bounds, length, name):
+        message = '{name} length {length} does not match declared bounds of {bounds}'
+        if not isinstance(bounds, tuple):
+            if (bounds != length):
+                raise ValueError(message.format(name=name, length=length, bounds=bounds))
+            else:
+                return
+        (lower, upper) = bounds
+        failure = ((lower is not None and (length < lower))
+                   or (upper is not None and length > upper))
+        if failure:
+            raise ValueError(message.format(name=name,length=length, bounds=bounds))
+
+    def __init__(self, rows=None, **params):
+        from pandas import Series as pdSeries
+        self.rows = rows
+        super(Series,self).__init__(pdSeries, allow_None=True, **params)
+        self._check_value(self.default)
+
+    def _check_value(self,val,obj=None):
+        super(Series, self)._check_value(val, obj)
+
+        if self.rows is not None:
+            self._length_bounds_check(self.rows, len(val), 'Row')
+
+    def __set__(self,obj,val):
+        self._check_value(val,obj)
+        super(Series,self).__set__(obj,val)
+
+
+
 # For portable code:
 #   - specify paths in unix (rather than Windows) style;
 #   - use resolve_file_path() for paths to existing files to be read,

diff --git a/setup.py b/setup.py
@@ -20,7 +20,7 @@ def get_setup_version(reponame):
     # (https://github.com/pypa/pip/issues/1197)
     'tests': [
         'nose',
-        'flake8',
+        'flake8'
     ]
 }
 

diff --git a/tests/API0/testdefaults.py b/tests/API0/testdefaults.py
@@ -22,6 +22,12 @@
 except ImportError:
     skip.append('Array')
 
+try:
+    import pandas
+except ImportError:
+    skip.append('DataFrame')
+    skip.append('Series')
+
 
 class TestDefaultsMetaclass(type):
     def __new__(mcs, name, bases, dict_):

diff --git a/tests/API1/testdefaults.py b/tests/API1/testdefaults.py
@@ -19,6 +19,11 @@
     import numpy
 except ImportError:
     skip.append('Array')
+try:
+    import pandas
+except ImportError:
+    skip.append('DataFrame')
+    skip.append('Series')
 
 
 class TestDefaultsMetaclass(type):

diff --git a/tests/API1/testpandas.py b/tests/API1/testpandas.py
@@ -0,0 +1,170 @@
+"""
+Test Parameters based on pandas
+"""
+import unittest
+import os
+
+import param
+from . import API1TestCase
+
+try:
+    import pandas
+except ImportError:
+    if os.getenv('PARAM_TEST_PANDAS','0') == '1':
+        raise ImportError("PARAM_TEST_PANDAS=1 but pandas not available.")
+    else:
+        raise unittest.SkipTest("pandas not available")
+
+
+class TestDataFrame(API1TestCase):
+
+    def test_empty_dataframe_param_invalid_set(self):
+        empty = pandas.DataFrame()
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=empty)
+
+        test = Test()
+        exception = "Parameter 'df' value must be an instance of DataFrame, not '3'"
+        with self.assertRaisesRegexp(ValueError, exception):
+            test.df = 3
+
+    def test_dataframe_unordered_column_set_valid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, columns={'a', 'b'})
+
+
+    def test_dataframe_unordered_column_set_invalid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'd':[4,5]}, columns=['b', 'a', 'd'])
+        invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, columns={'a', 'd'})
+
+
+        test = Test()
+        self.assertEquals(test.param.params('df').ordered, False)
+        exception = "Provided DataFrame columns \['b', 'a', 'c'\] does not contain required columns \['a', 'd'\]"
+        with self.assertRaisesRegexp(ValueError, exception):
+            test.df = invalid_df
+
+    def test_dataframe_ordered_column_list_valid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+        class Test(param.Parameterized):
+            test = param.DataFrame(default=valid_df, columns=['b', 'a', 'c'])
+
+
+    def test_dataframe_ordered_column_list_invalid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'd':[4,5]}, columns=['b', 'a', 'd'])
+        invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['a', 'b', 'd'])
+
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, columns=['b', 'a', 'd'])
+
+        test = Test()
+        self.assertEquals(test.param.params('df').ordered, True)
+
+        exception = "Provided DataFrame columns \['a', 'b', 'd'\] must exactly match \['b', 'a', 'd'\]"
+        with self.assertRaisesRegexp(ValueError, exception):
+            test.df = invalid_df
+
+
+    def test_dataframe_unordered_column_number_valid_df(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, columns=3)
+
+    def test_dataframe_unordered_column_number_invalid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+        invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3]}, columns=['b', 'a'])
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, columns=3)
+
+        test = Test()
+        self.assertEquals(test.param.params('df').ordered, None)
+
+        exception = "Column length 2 does not match declared bounds of 3"
+        with self.assertRaisesRegexp(ValueError, exception):
+            test.df = invalid_df
+
+
+    def test_dataframe_unordered_column_tuple_valid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, columns=(None,3))
+
+    def test_dataframe_unordered_column_tuple_invalid(self):
+
+        invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+
+        exception = "Columns length 3 does not match declared bounds of \(None, 2\)"
+        with self.assertRaisesRegexp(ValueError, exception):
+            class Test(param.Parameterized):
+                df = param.DataFrame(default=invalid_df, columns=(None,2))
+
+    def test_dataframe_row_number_valid_df(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, rows=2)
+
+    def test_dataframe_row_number_invalid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3]}, columns=['b', 'a'])
+        invalid_df = pandas.DataFrame({'a':[1,2,4], 'b':[2,3,4]}, columns=['b', 'a'])
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, rows=2)
+
+        test = Test()
+        exception = "Row length 3 does not match declared bounds of 2"
+        with self.assertRaisesRegexp(ValueError, exception):
+            test.df = invalid_df
+
+    def test_dataframe_unordered_row_tuple_valid(self):
+        valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+        class Test(param.Parameterized):
+            df = param.DataFrame(default=valid_df, rows=(None,3))
+
+    def test_dataframe_unordered_row_tuple_invalid(self):
+
+        invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
+
+        exception = "Row length 2 does not match declared bounds of \(5, 7\)"
+        with self.assertRaisesRegexp(ValueError, exception):
+            class Test(param.Parameterized):
+                df = param.DataFrame(default=invalid_df, rows=(5,7))
+
+
+class TestSeries(API1TestCase):
+
+    def test_dataframe_row_number_valid_df(self):
+        valid_df = pandas.Series([1,2])
+        class Test(param.Parameterized):
+            df = param.Series(default=valid_df, rows=2)
+
+    def test_dataframe_row_number_invalid(self):
+        valid_df = pandas.Series([1,2])
+        invalid_df = pandas.Series([1,2,3])
+        class Test(param.Parameterized):
+            df = param.Series(default=valid_df, rows=2)
+
+        test = Test()
+        exception = "Row length 3 does not match declared bounds of 2"
+        with self.assertRaisesRegexp(ValueError, exception):
+            test.df = invalid_df
+
+    def test_dataframe_unordered_row_tuple_valid(self):
+        valid_df = pandas.Series([1,2,3])
+        class Test(param.Parameterized):
+            df = param.Series(default=valid_df, rows=(None,3))
+
+    def test_dataframe_unordered_row_tuple_invalid(self):
+
+        invalid_df = pandas.Series([1,2])
+
+        exception = "Row length 2 does not match declared bounds of \(5, 7\)"
+        with self.assertRaisesRegexp(ValueError, exception):
+            class Test(param.Parameterized):
+                df = param.Series(default=invalid_df, rows=(5,7))
+
+if __name__ == "__main__":
+    import nose
+    nose.runmodule()
diff --git a/tox.ini b/tox.ini
@@ -4,6 +4,7 @@ envlist =
     {py27,py36}-flakes,
     {py27,py36}-with_numpy,
     {py27,py36}-with_ipython
+    {py27,py35,py36,py37}-with_pandas
 
 [testenv]
 deps = .[tests]
@@ -25,6 +26,12 @@ deps = {[testenv]deps}
        numpy
 setenv = PARAM_TEST_NUMPY = 1
 
+[testenv:with_pandas]
+deps = {[testenv]deps}
+       pandas
+setenv = PARAM_TEST_PANDAS = 1
+
+
 [testenv:with_ipython]
 deps = {[testenv]deps}
        ipython