alteryx · CJStadler · Jun 27, 2019 · Jun 25, 2019 · Jun 25, 2019 · Jun 25, 2019
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -10,6 +10,7 @@ Changelog
         * Select columns of dataframe using a list (:pr:`615`)
         * Change type of features calculated on Index features to Categorical (:pr:`602`)
         * Specify Dask version in requirements for python 2 (:pr:`627`)
+        * Keep dataframe sorted by time during feature calculation (:pr:`626`)
     * Changes
         * Remove unused variance_selection.py file (:pr:`613`)
         * Remove Timedelta data param (:pr:`619`)

diff --git a/featuretools/computational_backends/feature_set_calculator.py b/featuretools/computational_backends/feature_set_calculator.py
@@ -118,6 +118,10 @@ def run(self, instance_ids):
 
         df.index.name = self.entityset[self.feature_set.target_eid].index
         column_list = []
+
+        # Order by instance_ids
+        df = df.reindex(list(pd.unique(instance_ids)))
+
         for feat in self.feature_set.target_features:
             column_list.extend(feat.get_feature_names())
         return df[column_list]

diff --git a/featuretools/entityset/entity.py b/featuretools/entityset/entity.py
@@ -235,6 +235,9 @@ def query_by_values(self, instance_vals, variable_id=None, columns=None,
         Returns:
             pd.DataFrame : instances that match constraints with ids in order of underlying dataframe
         """
+        if not variable_id:
+            variable_id = self.index
+
         instance_vals = self._vals_to_series(instance_vals, variable_id)
 
         training_window = _check_timedelta(training_window)
@@ -249,10 +252,6 @@ def query_by_values(self, instance_vals, variable_id=None, columns=None,
         elif instance_vals.shape[0] == 0:
             df = self.df.head(0)
 
-        elif variable_id is None or variable_id == self.index:
-            df = self.df.reindex(instance_vals)
-            df.dropna(subset=[self.index], inplace=True)
-
         else:
             df = self.df[self.df[variable_id].isin(instance_vals)]
 

diff --git a/featuretools/tests/computational_backend/test_feature_set_calculator.py b/featuretools/tests/computational_backend/test_feature_set_calculator.py
@@ -499,12 +499,15 @@ def test_topn(es):
         ['taco clock', np.nan]
     ])
     assert ([name in df.columns for name in topn.get_feature_names()])
+
     for i in range(df.shape[0]):
+        true = true_results.loc[i]
+        actual = df.loc[i]
         if i == 0:
             # coke zero and toothpase have same number of occurrences
-            assert set(true_results.loc[i].values) == set(df.loc[i].values)
+            assert set(true.values) == set(actual.values)
         else:
-            for i1, i2 in zip(true_results.loc[i], df.iloc[i]):
+            for i1, i2 in zip(true, actual):
                 assert (pd.isnull(i1) and pd.isnull(i2)) or (i1 == i2)
 
 
@@ -746,3 +749,17 @@ def get_function(self):
     assert all(fm[f5.get_name()].sort_index() == value_sum)
     assert all(fm[f6.get_name()].sort_index() == value_sum)
     assert all(fm[f7.get_name()].sort_index() == value_sum)
+
+
+def test_returns_order_of_instance_ids(es):
+    feature_set = FeatureSet([ft.Feature(es['customers']['age'])])
+    calculator = FeatureSetCalculator(es,
+                                      time_last=None,
+                                      feature_set=feature_set)
+
+    instance_ids = [0, 1, 2]
+    assert list(es['customers'].df['id']) != instance_ids
+
+    df = calculator.run(instance_ids)
+
+    assert list(df.index) == instance_ids
diff --git a/featuretools/tests/entityset_tests/test_entity.py b/featuretools/tests/entityset_tests/test_entity.py
@@ -113,8 +113,7 @@ def test_query_by_values_secondary_time_index(es):
     result = es['customers'].query_by_values(all_instances, time_last=end)
 
     for col in ["cancel_date", "cancel_reason"]:
-        nulls = result.iloc[all_instances][col].isnull() == [
-            False, True, True]
+        nulls = result.loc[all_instances][col].isnull() == [False, True, True]
         assert nulls.all(), "Some instance has data it shouldn't for column %s" % col