Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Tamar Grey committed Sep 14, 2020
1 parent 90750f8 commit e7f7d00
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 10 deletions.
13 changes: 3 additions & 10 deletions data_tables/data_table.py
Expand Up @@ -155,14 +155,11 @@ def to_pandas_dataframe(self):
return self.dataframe

def select_ltypes(self, include):
"""Include columns in the DataTable whose logical types are specified here. Will remove any
columns whose logical type is not specified from the DataTable.
"""Update DataTable to only include columns whose logical types are specified here.
Will lose any column, including indices, whose logical type is not specified.
Args:
--> should the lists be able to have both strings and class, or just one or the other?
-->should this be from the current datatable or from the original data frame?
include (str or LogicalType or list[str or LogicalType]): Logical types to include in
the DataTable
include (str or LogicalType or list[str or LogicalType]): Logical types to include in the DataTable
"""
if not isinstance(include, list):
include = [include]
Expand All @@ -182,12 +179,8 @@ def set_logical_type(ltype):
if col.logical_type in include:
cols_to_include.append(col_name)

# --> does the underlyin dataframe ever get changed?
self.columns = {col_name: self.columns[col_name]for col_name in cols_to_include}

# --> are we allowed to get rid of index and time_index??
# --> Any error if no columns are removed or if given empty list?


def _validate_params(dataframe, name, index, time_index, logical_types, semantic_types):
"""Check that values supplied during DataTable initialization are valid"""
Expand Down
111 changes: 111 additions & 0 deletions data_tables/tests/data_table/test_datatable.py
Expand Up @@ -363,3 +363,114 @@ def test_replace_none_with_nan(none_df):
assert np.isnan(dt.df['all_none'].loc[1])
assert np.isnan(dt.df['all_none'].loc[2])
assert np.isnan(dt.df['some_none'].loc[1])


def test_select_ltypes(sample_df):
dt = DataTable(sample_df)
dt.set_logical_types({
'full_name': FullName,
'email': EmailAddress,
'phone_number': PhoneNumber,
'age': Double,
'signup_date': Double,
})

error_message = "Invalid logical type specified: 1"
with pytest.raises(TypeError, match=error_message):
dt.select_ltypes(1)

error_message = "String test is not a valid logical type"
with pytest.raises(ValueError, match=error_message):
dt.select_ltypes('test')

all_types = LogicalType.__subclasses__()
dt.select_ltypes(all_types)
assert len(dt.columns) == len(dt.df.columns)

dt.select_ltypes([])
assert not dt.columns

# Now that there are no columns, repeat the check with all ltypes
dt.select_ltypes(all_types)
assert not dt.columns


def test_select_ltypes_strings(sample_df):
dt = DataTable(sample_df)
dt.set_logical_types({
'full_name': FullName,
'email': EmailAddress,
'phone_number': PhoneNumber,
'age': Double,
'signup_date': Double,
})
new_types = {
'full_name': {'new_tag': {'additional': 'value'}},
'age': 'numeric',
}
dt.set_semantic_types(new_types)

original_col = dt.columns['full_name']

dt.select_ltypes(['FullName', 'email_address', 'double'])
assert len(dt.columns) == 4
assert 'phone_number' not in dt.columns

dt.select_ltypes('full_name')
assert len(dt.columns) == 1
col = dt.columns['full_name']

assert col.logical_type == original_col.logical_type
assert col.series.equals(original_col.series)
assert col.dtype == original_col.dtype
assert col.semantic_types.keys() == original_col.semantic_types.keys()


def test_select_ltypes_objects(sample_df):
dt = DataTable(sample_df)
dt.set_logical_types({
'full_name': FullName,
'email': EmailAddress,
'phone_number': PhoneNumber,
'age': Double,
'signup_date': Double,
})
new_types = {
'full_name': {'new_tag': {'additional': 'value'}},
'age': 'numeric',
}
dt.set_semantic_types(new_types)

original_col = dt.columns['full_name']

dt.select_ltypes([FullName, EmailAddress, Double])
assert len(dt.columns) == 4
assert 'phone_number' not in dt.columns

dt.select_ltypes(FullName)
assert len(dt.columns) == 1
col = dt.columns['full_name']

assert col.logical_type == original_col.logical_type
assert col.series.equals(original_col.series)
assert col.dtype == original_col.dtype
assert col.semantic_types.keys() == original_col.semantic_types.keys()


def test_select_ltypes_combined(sample_df):
dt = DataTable(sample_df)
dt.set_logical_types({
'full_name': FullName,
'email': EmailAddress,
'phone_number': PhoneNumber,
'age': Double,
'signup_date': Double,
})

dt.select_ltypes(['FullName', 'email_address', Double])
assert len(dt.columns) == 4
assert 'phone_number' not in dt.columns

# Selecting for an ltype that isn't present should result in an empty DataTable
dt.select_ltypes(PhoneNumber)
assert not dt.columns
5 changes: 5 additions & 0 deletions data_tables/tests/logical_types/test_logical_types.py
@@ -1,6 +1,7 @@
from data_tables.logical_types import (
Boolean,
Categorical,
FullName,
LogicalType,
get_logical_types,
str_to_logical_type
Expand Down Expand Up @@ -37,3 +38,7 @@ def test_str_to_logical_type():
for logical_type in all_types:
assert str_to_logical_type(logical_type.__name__) == logical_type
assert str_to_logical_type(logical_type.type_string) == logical_type

assert str_to_logical_type('bOoLeAn') == Boolean
assert str_to_logical_type('full_NAME') == FullName
assert str_to_logical_type('FullnamE') == FullName

0 comments on commit e7f7d00

Please sign in to comment.