diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index f3ec462381..ec5b2f4132 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -4,11 +4,13 @@ Changelog --------- .. **Future Release** * Enhancements + * Add ``make_index`` when initializing an EntitySet by passing in an ``entities`` dictionary (:pr:`1010`) * Fixes * Changes * Documentation Changes * Testing Changes Thanks to the following people for contributing to this release: + :user:`gsheni` **v0.15.0 May 29, 2020** * Enhancements @@ -16,11 +18,11 @@ Changelog * Allow cutoff time dataframe columns to be in any order (:pr:`969`, :pr:`995`) * Add Age primitive, and make it a default transform primitive for DFS (:pr:`987`) * Add ``include_cutoff_time`` arg - control whether data at cutoff times are included in feature calculations (:pr:`959`) - * Allow ``variables_types`` to be referenced by their ``type_string`` + * Allow ``variables_types`` to be referenced by their ``type_string`` for the ``entity_from_dataframe`` function (:pr:`988`) * Fixes * Fix errors with Equals and NotEquals primitives when comparing categoricals or different dtypes (:pr:`968`) - * Normalized type_strings of ``Variable`` classes so that the ``find_variable_types`` function produces a + * Normalized type_strings of ``Variable`` classes so that the ``find_variable_types`` function produces a dictionary with a clear key to name transition (:pr:`982`, :pr:`996`) * Remove pandas.datetime in test_calculate_feature_matrix due to deprecation (:pr:`998`) * Documentation Changes diff --git a/featuretools/entityset/entityset.py b/featuretools/entityset/entityset.py index 96c7181b2a..346c3cffe5 100644 --- a/featuretools/entityset/entityset.py +++ b/featuretools/entityset/entityset.py @@ -39,8 +39,8 @@ def __init__(self, id=None, entities=None, relationships=None): entities (dict[str -> tuple(pd.DataFrame, str, str, dict[str -> Variable])]): dictionary of entities. Entries take the format - {entity id -> (dataframe, id column, (time_column), (variable_types))}. - Note that time_column and variable_types are optional. + {entity id -> (dataframe, id column, (time_index), (variable_types), (make_index))}. + Note that time_index, variable_types and make_index are optional. relationships (list[(str, str, str, str)]): List of relationships between entities. List items are a tuple with the format @@ -69,17 +69,21 @@ def __init__(self, id=None, entities=None, relationships=None): for entity in entities: df = entities[entity][0] index_column = entities[entity][1] - time_column = None + time_index = None variable_types = None + make_index = None if len(entities[entity]) > 2: - time_column = entities[entity][2] + time_index = entities[entity][2] if len(entities[entity]) > 3: variable_types = entities[entity][3] + if len(entities[entity]) > 4: + make_index = entities[entity][4] self.entity_from_dataframe(entity_id=entity, dataframe=df, index=index_column, - time_index=time_column, - variable_types=variable_types) + time_index=time_index, + variable_types=variable_types, + make_index=make_index) for relationship in relationships: parent_variable = self[relationship[0]][relationship[1]] diff --git a/featuretools/tests/entityset_tests/test_es.py b/featuretools/tests/entityset_tests/test_es.py index 0aa5068ed1..03ce16a27d 100644 --- a/featuretools/tests/entityset_tests/test_es.py +++ b/featuretools/tests/entityset_tests/test_es.py @@ -1013,3 +1013,42 @@ def test_normalize_with_invalid_time_index(es): index="cancel_reason", copy_variables=['upgrade_date']) es['customers'].convert_variable_type('signup_date', variable_types.DatetimeTimeIndex) + + +def test_entityset_init(): + cards_df = pd.DataFrame({"id": [1, 2, 3, 4, 5]}) + transactions_df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], + "card_id": [1, 2, 1, 3, 4, 5], + "transaction_time": [10, 12, 13, 20, 21, 20], + "upgrade_date": [51, 23, 45, 12, 22, 53], + "fraud": [True, False, False, False, True, True]}) + variable_types = { + 'fraud': 'boolean', + 'card_id': 'categorical' + } + entities = { + "cards": (cards_df, "id"), + "transactions": (transactions_df, 'id', 'transaction_time', + variable_types, False) + } + relationships = [('cards', 'id', 'transactions', 'id')] + es = ft.EntitySet(id="fraud_data", + entities=entities, + relationships=relationships) + assert es['transactions'].index == 'id' + assert es['transactions'].time_index == 'transaction_time' + es_copy = ft.EntitySet(id="fraud_data") + es_copy.entity_from_dataframe(entity_id='cards', + dataframe=cards_df, + index='id') + es_copy.entity_from_dataframe(entity_id='transactions', + dataframe=transactions_df, + index='id', + variable_types=variable_types, + make_index=False, + time_index='transaction_time') + relationship = ft.Relationship(es_copy["cards"]["id"], + es_copy["transactions"]["id"]) + es_copy.add_relationship(relationship) + assert es['cards'].__eq__(es_copy['cards'], deep=True) + assert es['transactions'].__eq__(es_copy['transactions'], deep=True)