Add make index to init of EntitySet (#1010)

* added to init and test * added to changelog * fix based on comments * fix pr number
alteryx · Jun 3, 2020 · d42a52d · d42a52d
1 parent e8c9709
commit d42a52d
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 8 deletions.
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -4,23 +4,25 @@ Changelog
 ---------
 .. **Future Release**
     * Enhancements
+        * Add ``make_index`` when initializing an EntitySet by passing in an ``entities`` dictionary (:pr:`1010`)
     * Fixes
     * Changes
     * Documentation Changes
     * Testing Changes
     Thanks to the following people for contributing to this release:
+    :user:`gsheni`
 
 **v0.15.0 May 29, 2020**
     * Enhancements
         * Add ``get_default_aggregation_primitives`` and ``get_default_transform_primitives`` (:pr:`945`)
         * Allow cutoff time dataframe columns to be in any order (:pr:`969`, :pr:`995`)
         * Add Age primitive, and make it a default transform primitive for DFS (:pr:`987`)
         * Add ``include_cutoff_time`` arg - control whether data at cutoff times are included in feature calculations (:pr:`959`)
-        * Allow ``variables_types`` to be referenced by their ``type_string`` 
+        * Allow ``variables_types`` to be referenced by their ``type_string``
           for the ``entity_from_dataframe`` function (:pr:`988`)
     * Fixes
         * Fix errors with Equals and NotEquals primitives when comparing categoricals or different dtypes (:pr:`968`)
-        * Normalized type_strings of ``Variable`` classes so that the ``find_variable_types`` function produces a 
+        * Normalized type_strings of ``Variable`` classes so that the ``find_variable_types`` function produces a
           dictionary with a clear key to name transition (:pr:`982`, :pr:`996`)
         * Remove pandas.datetime in test_calculate_feature_matrix due to deprecation (:pr:`998`)
     * Documentation Changes

diff --git a/featuretools/entityset/entityset.py b/featuretools/entityset/entityset.py
@@ -39,8 +39,8 @@ def __init__(self, id=None, entities=None, relationships=None):
 
                 entities (dict[str -> tuple(pd.DataFrame, str, str, dict[str -> Variable])]): dictionary of
                     entities. Entries take the format
-                    {entity id -> (dataframe, id column, (time_column), (variable_types))}.
-                    Note that time_column and variable_types are optional.
+                    {entity id -> (dataframe, id column, (time_index), (variable_types), (make_index))}.
+                    Note that time_index, variable_types and make_index are optional.
 
                 relationships (list[(str, str, str, str)]): List of relationships
                     between entities. List items are a tuple with the format
@@ -69,17 +69,21 @@ def __init__(self, id=None, entities=None, relationships=None):
         for entity in entities:
             df = entities[entity][0]
             index_column = entities[entity][1]
-            time_column = None
+            time_index = None
             variable_types = None
+            make_index = None
             if len(entities[entity]) > 2:
-                time_column = entities[entity][2]
+                time_index = entities[entity][2]
             if len(entities[entity]) > 3:
                 variable_types = entities[entity][3]
+            if len(entities[entity]) > 4:
+                make_index = entities[entity][4]
             self.entity_from_dataframe(entity_id=entity,
                                        dataframe=df,
                                        index=index_column,
-                                       time_index=time_column,
-                                       variable_types=variable_types)
+                                       time_index=time_index,
+                                       variable_types=variable_types,
+                                       make_index=make_index)
 
         for relationship in relationships:
             parent_variable = self[relationship[0]][relationship[1]]

diff --git a/featuretools/tests/entityset_tests/test_es.py b/featuretools/tests/entityset_tests/test_es.py
@@ -1013,3 +1013,42 @@ def test_normalize_with_invalid_time_index(es):
                             index="cancel_reason",
                             copy_variables=['upgrade_date'])
     es['customers'].convert_variable_type('signup_date', variable_types.DatetimeTimeIndex)
+
+
+def test_entityset_init():
+    cards_df = pd.DataFrame({"id": [1, 2, 3, 4, 5]})
+    transactions_df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6],
+                                    "card_id": [1, 2, 1, 3, 4, 5],
+                                    "transaction_time": [10, 12, 13, 20, 21, 20],
+                                    "upgrade_date": [51, 23, 45, 12, 22, 53],
+                                    "fraud": [True, False, False, False, True, True]})
+    variable_types = {
+        'fraud': 'boolean',
+        'card_id': 'categorical'
+    }
+    entities = {
+        "cards": (cards_df, "id"),
+        "transactions": (transactions_df, 'id', 'transaction_time',
+                         variable_types, False)
+    }
+    relationships = [('cards', 'id', 'transactions', 'id')]
+    es = ft.EntitySet(id="fraud_data",
+                      entities=entities,
+                      relationships=relationships)
+    assert es['transactions'].index == 'id'
+    assert es['transactions'].time_index == 'transaction_time'
+    es_copy = ft.EntitySet(id="fraud_data")
+    es_copy.entity_from_dataframe(entity_id='cards',
+                                  dataframe=cards_df,
+                                  index='id')
+    es_copy.entity_from_dataframe(entity_id='transactions',
+                                  dataframe=transactions_df,
+                                  index='id',
+                                  variable_types=variable_types,
+                                  make_index=False,
+                                  time_index='transaction_time')
+    relationship = ft.Relationship(es_copy["cards"]["id"],
+                                   es_copy["transactions"]["id"])
+    es_copy.add_relationship(relationship)
+    assert es['cards'].__eq__(es_copy['cards'], deep=True)
+    assert es['transactions'].__eq__(es_copy['transactions'], deep=True)