Skip to content

Commit

Permalink
Implement select_ltypes with implementation questions remaining
Browse files Browse the repository at this point in the history
  • Loading branch information
Tamar Grey committed Sep 14, 2020
1 parent 31c1015 commit 5e5afda
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 4 deletions.
7 changes: 3 additions & 4 deletions data_tables/data_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
NaturalLanguage,
Timedelta,
WholeNumber,
get_logical_types
str_to_logical_type
)


Expand Down Expand Up @@ -53,12 +53,11 @@ def __repr__(self):
return msg

def set_logical_type(self, logical_type):
logical_types_dict = get_logical_types()
if logical_type:
if logical_type in LogicalType.__subclasses__():
self._logical_type = logical_type
elif isinstance(logical_type, str) and logical_type in logical_types_dict:
self._logical_type = logical_types_dict[logical_type]
elif isinstance(logical_type, str):
self._logical_type = str_to_logical_type(logical_type)
else:
raise TypeError(f"Invalid logical type specified for '{self.series.name}'")
else:
Expand Down
35 changes: 35 additions & 0 deletions data_tables/data_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd

from data_tables.data_column import DataColumn
from data_tables.logical_types import LogicalType, str_to_logical_type


class DataTable(object):
Expand Down Expand Up @@ -153,6 +154,40 @@ def df(self):
def to_pandas_dataframe(self):
return self.dataframe

def select_ltypes(self, include):
"""Include columns in the DataTable whose logical types are specified here. Will remove any
columns whose logical type is not specified from the DataTable.
Args:
--> should the lists be able to have both strings and class, or just one or the other?
-->should this be from the current datatable or from the original data frame?
include (str or LogicalType or list[str or LogicalType]): Logical types to include in
the DataTable
"""
if not isinstance(include, list):
include = [include]

def set_logical_type(ltype):
if ltype in LogicalType.__subclasses__():
return ltype
elif isinstance(ltype, str):
return str_to_logical_type(ltype)
else:
raise TypeError(f"Invalid logical type specified: {ltype}")

include = {set_logical_type(ltype) for ltype in include}

cols_to_include = []
for col_name, col in self.columns.items():
if col.logical_type in include:
cols_to_include.append(col_name)

# --> does the underlyin dataframe ever get changed?
self.columns = {col_name: self.columns[col_name]for col_name in cols_to_include}

# --> are we allowed to get rid of index and time_index??
# --> Any error if no columns are removed or if given empty list?


def _validate_params(dataframe, name, index, time_index, logical_types, semantic_types):
"""Check that values supplied during DataTable initialization are valid"""
Expand Down
9 changes: 9 additions & 0 deletions data_tables/logical_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,12 @@ def get_logical_types():
logical_types.update(class_name_dict)

return logical_types


def str_to_logical_type(logical_str):
logical_types_dict = get_logical_types()

if logical_str in logical_types_dict:
return logical_types_dict[logical_str]
else:
raise ValueError('String %s is not a valid logical type' % logical_str)

0 comments on commit 5e5afda

Please sign in to comment.