Skip to content

Commit

Permalink
Entity Normalization Preserves Types of Copy & Additional Variables (#25
Browse files Browse the repository at this point in the history
)

* make sure to copy types of variables over when normalizing an entity

* added wrong the wrong branch, now is the right file

* added test

* included copy variables in test

* removed copied code from other pr

* removed whitespace
  • Loading branch information
Ben Schreck authored and kmax12 committed Oct 31, 2017
1 parent 0310c8f commit edff572
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
2 changes: 2 additions & 0 deletions featuretools/entityset/entityset.py
Expand Up @@ -710,6 +710,8 @@ def normalize_entity(self, base_entity_id, new_entity_id, index,

transfer_types = {}
transfer_types[new_index] = type(base_entity[index])
for v in additional_variables + copy_variables:
transfer_types[v] = type(base_entity[v])

# create and add new entity
new_entity_df = self.get_dataframe(base_entity_id)
Expand Down
19 changes: 19 additions & 0 deletions featuretools/tests/entityset_tests/test_pandas_es.py
Expand Up @@ -609,6 +609,25 @@ def test_normalize_entity(self, entityset):
assert 'device_name' not in entityset['sessions'].df.columns
assert 'device_type' in entityset['device_types'].df.columns

def test_normalize_entity_copies_variable_types(self, entityset):
entityset['log'].convert_variable_type('value', variable_types.Ordinal, convert_data=False)
assert entityset['log'].variable_types['value'] == variable_types.Ordinal
assert entityset['log'].variable_types['priority_level'] == variable_types.Ordinal
entityset.normalize_entity('log', 'values_2', 'value_2',
additional_variables=['priority_level'],
copy_variables=['value'],
make_time_index=False)

assert len(entityset.get_forward_relationships('log')) == 3
assert entityset.get_forward_relationships('log')[2].parent_entity.id == 'values_2'
assert 'priority_level' in entityset['values_2'].df.columns
assert 'value' in entityset['values_2'].df.columns
assert 'priority_level' not in entityset['log'].df.columns
assert 'value' in entityset['log'].df.columns
assert 'value_2' in entityset['values_2'].df.columns
assert entityset['values_2'].variable_types['priority_level'] == variable_types.Ordinal
assert entityset['values_2'].variable_types['value'] == variable_types.Ordinal

def test_make_time_index_keeps_original_sorting(self):
trips = {
'trip_id': [999 - i for i in xrange(1000)],
Expand Down

0 comments on commit edff572

Please sign in to comment.