diff --git a/featuretools/entityset/entityset.py b/featuretools/entityset/entityset.py index 596587ba66..fb08e2534d 100644 --- a/featuretools/entityset/entityset.py +++ b/featuretools/entityset/entityset.py @@ -636,6 +636,7 @@ def entity_from_dataframe(self, make_index=False, time_index=None, secondary_time_index=None, + last_time_index=None, already_sorted=False): """ Load the data for a specified entity from a Pandas DataFrame. @@ -690,12 +691,21 @@ def entity_from_dataframe(self, es["transactions"].df """ - return self._import_from_dataframe(entity_id, dataframe.copy(), index=index, - make_index=make_index, - time_index=time_index, - secondary_time_index=secondary_time_index, - variable_types=variable_types, - already_sorted=already_sorted) + variable_types = variable_types or {} + entity = Entity( + entity_id, + dataframe, + self, + variable_types=variable_types, + index=index, + time_index=time_index, + secondary_time_index=secondary_time_index, + last_time_index=last_time_index, + already_sorted=already_sorted, + make_index=make_index) + self.entity_dict[entity.id] = entity + self.reset_metadata() + return self def normalize_entity(self, base_entity_id, new_entity_id, index, additional_variables=None, copy_variables=None, @@ -824,12 +834,14 @@ def normalize_entity(self, base_entity_id, new_entity_id, index, ti_cols = [c if c != old_ti_name else secondary_time_index for c in ti_cols] make_secondary_time_index = {secondary_time_index: ti_cols} - self._import_from_dataframe(new_entity_id, new_entity_df, - index, - time_index=new_entity_time_index, - secondary_time_index=make_secondary_time_index, - last_time_index=None, - variable_types=transfer_types) + self.entity_from_dataframe( + new_entity_id, + new_entity_df, + index, + time_index=new_entity_time_index, + secondary_time_index=make_secondary_time_index, + last_time_index=None, + variable_types=transfer_types) for v in additional_variables: self.entity_dict[base_entity_id].delete_variable(v) @@ -1083,52 +1095,6 @@ def related_instances(self, start_entity_id, final_entity_id, # Private methods ###################################################### ########################################################################### - def _import_from_dataframe(self, - entity_id, - dataframe, - index=None, - variable_types=None, - make_index=False, - time_index=None, - secondary_time_index=None, - last_time_index=None, - already_sorted=False): - """ - Load the data for a specified entity from a pandas dataframe. - - Args: - entity_id (str) : Unique id to associate with this entity. - dataframe (pd.DataFrame) : Pandas dataframe containing the data. - index (str, optional): Name of the variable used to index the entity. - If None, take the first column. - variable_types (dict[str -> dict[str -> type]]) : Optional mapping of - entity_id to variable_types dict with which to initialize an - entity's store. - make_index (bool, optional) : If True, assume index does not exist as a column in - dataframe, and create a new column of that name using integers the (0, len(dataframe)). - Otherwise, assume index exists in dataframe. - time_index (str, optional) : Name of column to use as a time index for this entity. Must be - a Datetime or Numeric dtype. - secondary_time_index (str, optional): Name of variable containing - time data to use a second time index for the entity. - already_sorted (bool, optional) : If True, assumes that input dataframe is already sorted by time. - Defaults to False. - """ - variable_types = variable_types or {} - entity = Entity(entity_id, - dataframe, - self, - variable_types=variable_types, - index=index, - time_index=time_index, - secondary_time_index=secondary_time_index, - last_time_index=last_time_index, - already_sorted=already_sorted, - make_index=make_index) - self.entity_dict[entity.id] = entity - self.reset_metadata() - return self - def _add_multigenerational_link_vars(self, frames, start_entity_id, end_entity_id=None, path=None): """ diff --git a/featuretools/tests/entityset_tests/test_es.py b/featuretools/tests/entityset_tests/test_es.py index 3d84448dc0..5a1086d91c 100644 --- a/featuretools/tests/entityset_tests/test_es.py +++ b/featuretools/tests/entityset_tests/test_es.py @@ -322,9 +322,12 @@ def test_converts_datetime(): 'time': variable_types.Datetime} entityset = EntitySet(id='test') - entityset._import_from_dataframe(entity_id='test_entity', index='id', - time_index="time", variable_types=vtypes, - dataframe=df) + entityset.entity_from_dataframe( + entity_id='test_entity', + index='id', + time_index="time", + variable_types=vtypes, + dataframe=df) pd_col = entityset['test_entity'].df['time'] # assert type(entityset['test_entity']['time']) == variable_types.Datetime assert type(pd_col[0]) == pd.Timestamp @@ -343,8 +346,11 @@ def test_handles_datetime_format(): 'time_no_format': variable_types.Datetime} entityset = EntitySet(id='test') - entityset._import_from_dataframe(entity_id='test_entity', index='id', - variable_types=vtypes, dataframe=df) + entityset.entity_from_dataframe( + entity_id='test_entity', + index='id', + variable_types=vtypes, + dataframe=df) col_format = entityset['test_entity'].df['time_format'] col_no_format = entityset['test_entity'].df['time_no_format']