Skip to content

Commit

Permalink
Merge 4a20e6c into a97c18b
Browse files Browse the repository at this point in the history
  • Loading branch information
jlstevens committed Oct 2, 2018
2 parents a97c18b + 4a20e6c commit 9cd70ce
Show file tree
Hide file tree
Showing 7 changed files with 308 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .travis.yml
Expand Up @@ -63,6 +63,9 @@ jobs:
- <<: *default
env: TOX_ENV=with_numpy

- <<: *default
env: TOX_ENV=with_pandas

- <<: *default
env: TOX_ENV=coverage

Expand Down
116 changes: 116 additions & 0 deletions param/__init__.py
Expand Up @@ -1287,6 +1287,122 @@ def __init__(self, **params):
super(Array,self).__init__(ndarray, allow_None=True, **params)


class DataFrame(ClassSelector):
"""
Parameter whose value is a pandas DataFrame.
The structure of the DataFrame can be constrained by the rows and
columns arguments:
rows: If specified, may be a number or an integer bounds tuple to
constrain the allowable number of rows.
columns: If specified, may be a number, an integer bounds tuple, a
list or a set. If the argument is numeric, constrains the number of
columns using the same semantics as used for rows. If either a list
or set of strings, the column names will be validated. If a set is
used, the supplied DataFrame must contain the specified columns and
if a list is given, the supplied DataFrame must contain exactly the
same columns and in the same order and no other columns.
"""
__slots__ = ['rows','columns', 'ordered']

def __init__(self, rows=None, columns=None, ordered=None, **params):
from pandas import DataFrame as pdDFrame
self.rows = rows
self.columns = columns
self.ordered = ordered
super(DataFrame,self).__init__(pdDFrame, allow_None=True, **params)
self._check_value(self.default)


def _length_bounds_check(self, bounds, length, name):
message = '{name} length {length} does not match declared bounds of {bounds}'
if not isinstance(bounds, tuple):
if (bounds != length):
raise ValueError(message.format(name=name, length=length, bounds=bounds))
else:
return
(lower, upper) = bounds
failure = ((lower is not None and (length < lower))
or (upper is not None and length > upper))
if failure:
raise ValueError(message.format(name=name,length=length, bounds=bounds))

def _check_value(self,val,obj=None):
super(DataFrame, self)._check_value(val, obj)

if isinstance(self.columns, set) and self.ordered is True:
raise ValueError('Columns cannot be ordered when specified as a set')

if self.columns is None:
pass
elif (isinstance(self.columns, tuple) and len(self.columns)==2
and all(isinstance(v, (type(None), numbers.Number)) for v in self.columns)): # Numeric bounds tuple
self._length_bounds_check(self.columns, len(val.columns), 'Columns')
elif isinstance(self.columns, (list, set)):
self.ordered = isinstance(self.columns, list) if self.ordered is None else self.ordered
difference = set(self.columns) - set([str(el) for el in val.columns])
if difference:
msg = 'Provided DataFrame columns {found} does not contain required columns {expected}'
raise ValueError(msg.format(found=list(val.columns), expected=sorted(self.columns)))
else:
self._length_bounds_check(self.columns, len(val.columns), 'Column')

if self.ordered:
if list(val.columns) != list(self.columns):
msg = 'Provided DataFrame columns {found} must exactly match {expected}'
raise ValueError(msg.format(found=list(val.columns), expected=self.columns))

if self.rows is not None:
self._length_bounds_check(self.rows, len(val), 'Row')

def __set__(self,obj,val):
self._check_value(val,obj)
super(DataFrame,self).__set__(obj,val)


class Series(ClassSelector):
"""
Parameter whose value is a pandas Series.
The structure of the Series can be constrained by the rows argument
which may be a number or an integer bounds tuple to constrain the
allowable number of rows.
"""
__slots__ = ['rows']

def _length_bounds_check(self, bounds, length, name):
message = '{name} length {length} does not match declared bounds of {bounds}'
if not isinstance(bounds, tuple):
if (bounds != length):
raise ValueError(message.format(name=name, length=length, bounds=bounds))
else:
return
(lower, upper) = bounds
failure = ((lower is not None and (length < lower))
or (upper is not None and length > upper))
if failure:
raise ValueError(message.format(name=name,length=length, bounds=bounds))

def __init__(self, rows=None, **params):
from pandas import Series as pdSeries
self.rows = rows
super(Series,self).__init__(pdSeries, allow_None=True, **params)
self._check_value(self.default)

def _check_value(self,val,obj=None):
super(Series, self)._check_value(val, obj)

if self.rows is not None:
self._length_bounds_check(self.rows, len(val), 'Row')

def __set__(self,obj,val):
self._check_value(val,obj)
super(Series,self).__set__(obj,val)



# For portable code:
# - specify paths in unix (rather than Windows) style;
# - use resolve_file_path() for paths to existing files to be read,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -20,7 +20,7 @@ def get_setup_version(reponame):
# (https://github.com/pypa/pip/issues/1197)
'tests': [
'nose',
'flake8',
'flake8'
]
}

Expand Down
6 changes: 6 additions & 0 deletions tests/API0/testdefaults.py
Expand Up @@ -22,6 +22,12 @@
except ImportError:
skip.append('Array')

try:
import pandas
except ImportError:
skip.append('DataFrame')
skip.append('Series')


class TestDefaultsMetaclass(type):
def __new__(mcs, name, bases, dict_):
Expand Down
5 changes: 5 additions & 0 deletions tests/API1/testdefaults.py
Expand Up @@ -19,6 +19,11 @@
import numpy
except ImportError:
skip.append('Array')
try:
import pandas
except ImportError:
skip.append('DataFrame')
skip.append('Series')


class TestDefaultsMetaclass(type):
Expand Down
170 changes: 170 additions & 0 deletions tests/API1/testpandas.py
@@ -0,0 +1,170 @@
"""
Test Parameters based on pandas
"""
import unittest
import os

import param
from . import API1TestCase

try:
import pandas
except ImportError:
if os.getenv('PARAM_TEST_PANDAS','0') == '1':
raise ImportError("PARAM_TEST_PANDAS=1 but pandas not available.")
else:
raise unittest.SkipTest("pandas not available")


class TestDataFrame(API1TestCase):

def test_empty_dataframe_param_invalid_set(self):
empty = pandas.DataFrame()
class Test(param.Parameterized):
df = param.DataFrame(default=empty)

test = Test()
exception = "Parameter 'df' value must be an instance of DataFrame, not '3'"
with self.assertRaisesRegexp(ValueError, exception):
test.df = 3

def test_dataframe_unordered_column_set_valid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, columns={'a', 'b'})


def test_dataframe_unordered_column_set_invalid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'd':[4,5]}, columns=['b', 'a', 'd'])
invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])

class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, columns={'a', 'd'})


test = Test()
self.assertEquals(test.param.params('df').ordered, False)
exception = "Provided DataFrame columns \['b', 'a', 'c'\] does not contain required columns \['a', 'd'\]"
with self.assertRaisesRegexp(ValueError, exception):
test.df = invalid_df

def test_dataframe_ordered_column_list_valid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
class Test(param.Parameterized):
test = param.DataFrame(default=valid_df, columns=['b', 'a', 'c'])


def test_dataframe_ordered_column_list_invalid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'd':[4,5]}, columns=['b', 'a', 'd'])
invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['a', 'b', 'd'])

class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, columns=['b', 'a', 'd'])

test = Test()
self.assertEquals(test.param.params('df').ordered, True)

exception = "Provided DataFrame columns \['a', 'b', 'd'\] must exactly match \['b', 'a', 'd'\]"
with self.assertRaisesRegexp(ValueError, exception):
test.df = invalid_df


def test_dataframe_unordered_column_number_valid_df(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, columns=3)

def test_dataframe_unordered_column_number_invalid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3]}, columns=['b', 'a'])
class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, columns=3)

test = Test()
self.assertEquals(test.param.params('df').ordered, None)

exception = "Column length 2 does not match declared bounds of 3"
with self.assertRaisesRegexp(ValueError, exception):
test.df = invalid_df


def test_dataframe_unordered_column_tuple_valid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, columns=(None,3))

def test_dataframe_unordered_column_tuple_invalid(self):

invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])

exception = "Columns length 3 does not match declared bounds of \(None, 2\)"
with self.assertRaisesRegexp(ValueError, exception):
class Test(param.Parameterized):
df = param.DataFrame(default=invalid_df, columns=(None,2))

def test_dataframe_row_number_valid_df(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, rows=2)

def test_dataframe_row_number_invalid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3]}, columns=['b', 'a'])
invalid_df = pandas.DataFrame({'a':[1,2,4], 'b':[2,3,4]}, columns=['b', 'a'])
class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, rows=2)

test = Test()
exception = "Row length 3 does not match declared bounds of 2"
with self.assertRaisesRegexp(ValueError, exception):
test.df = invalid_df

def test_dataframe_unordered_row_tuple_valid(self):
valid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])
class Test(param.Parameterized):
df = param.DataFrame(default=valid_df, rows=(None,3))

def test_dataframe_unordered_row_tuple_invalid(self):

invalid_df = pandas.DataFrame({'a':[1,2], 'b':[2,3], 'c':[4,5]}, columns=['b', 'a', 'c'])

exception = "Row length 2 does not match declared bounds of \(5, 7\)"
with self.assertRaisesRegexp(ValueError, exception):
class Test(param.Parameterized):
df = param.DataFrame(default=invalid_df, rows=(5,7))


class TestSeries(API1TestCase):

def test_dataframe_row_number_valid_df(self):
valid_df = pandas.Series([1,2])
class Test(param.Parameterized):
df = param.Series(default=valid_df, rows=2)

def test_dataframe_row_number_invalid(self):
valid_df = pandas.Series([1,2])
invalid_df = pandas.Series([1,2,3])
class Test(param.Parameterized):
df = param.Series(default=valid_df, rows=2)

test = Test()
exception = "Row length 3 does not match declared bounds of 2"
with self.assertRaisesRegexp(ValueError, exception):
test.df = invalid_df

def test_dataframe_unordered_row_tuple_valid(self):
valid_df = pandas.Series([1,2,3])
class Test(param.Parameterized):
df = param.Series(default=valid_df, rows=(None,3))

def test_dataframe_unordered_row_tuple_invalid(self):

invalid_df = pandas.Series([1,2])

exception = "Row length 2 does not match declared bounds of \(5, 7\)"
with self.assertRaisesRegexp(ValueError, exception):
class Test(param.Parameterized):
df = param.Series(default=invalid_df, rows=(5,7))

if __name__ == "__main__":
import nose
nose.runmodule()
7 changes: 7 additions & 0 deletions tox.ini
Expand Up @@ -4,6 +4,7 @@ envlist =
{py27,py36}-flakes,
{py27,py36}-with_numpy,
{py27,py36}-with_ipython
{py27,py35,py36,py37}-with_pandas

[testenv]
deps = .[tests]
Expand All @@ -25,6 +26,12 @@ deps = {[testenv]deps}
numpy
setenv = PARAM_TEST_NUMPY = 1

[testenv:with_pandas]
deps = {[testenv]deps}
pandas
setenv = PARAM_TEST_PANDAS = 1


[testenv:with_ipython]
deps = {[testenv]deps}
ipython
Expand Down

0 comments on commit 9cd70ce

Please sign in to comment.