alteryx · jeremyliweishih · Dec 10, 2019 · Dec 9, 2019 · Dec 9, 2019 · Dec 9, 2019
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -6,6 +6,7 @@ Changelog
     * Enhancements
         * Added ability to create a plot of feature importances :pr:`133`
         * Added ROC and confusion matrix metrics and plot for classification problems and introduce PipelineSearchPlots class :pr:`242`
+        * Enhanced AutoML results with search order :pr:`260`
     * Fixes
         * Lower botocore requirement :pr:`235`
         * Fixed decision_function calculation for FraudCost objective :pr:`254`
@@ -21,6 +22,12 @@ Changelog
     * Testing Changes
         * Added support for testing on Windows with CircleCI :pr:`226`
         * Added support for doctests :pr:`233`
+
+**Breaking Changes**
+    *   `autoclassifier.results` and `autoregressor.results` now is a dictionary 
+        with `pipeline_results` and `search_order` keys. `pipeline_results` can be used
+        to access a dictionary that is identical to the old `.results` dictionary. Whereas,
+        `search_order` returns a list of the search order in terms of pipeline id. 
 
 **v0.5.2 Nov. 18, 2019**
     * Enhancements

diff --git a/docs/source/index.ipynb b/docs/source/index.ipynb
@@ -259,7 +259,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.5"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,

diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py
@@ -58,7 +58,11 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
             self.max_time = convert_to_seconds(max_time)
         else:
             raise TypeError("max_time must be a float, int, or string. Received a {}.".format(type(max_time)))
-        self.results = {}
+
+        self.results = {
+            'pipeline_results': {},
+            'search_order': []
+        }
         self.trained_pipelines = {}
         self.random_state = random_state
         random.seed(self.random_state)
@@ -251,9 +255,9 @@ def _add_result(self, trained_pipeline, parameters, training_time, cv_data):
 
         pipeline_class_name = trained_pipeline.__class__.__name__
         pipeline_name = trained_pipeline.name
-        pipeline_id = len(self.results)
+        pipeline_id = len(self.results['pipeline_results'])
 
-        self.results[pipeline_id] = {
+        self.results['pipeline_results'][pipeline_id] = {
             "id": pipeline_id,
             "pipeline_class_name": pipeline_class_name,
             "pipeline_name": pipeline_name,
@@ -264,8 +268,10 @@ def _add_result(self, trained_pipeline, parameters, training_time, cv_data):
             "cv_data": cv_data
         }
 
+        self.results['search_order'].append(pipeline_id)
+
         if self.add_result_callback:
-            self.add_result_callback(self.results[pipeline_id], trained_pipeline)
+            self.add_result_callback(self.results['pipeline_results'][pipeline_id], trained_pipeline)
 
         self._save_pipeline(pipeline_id, trained_pipeline)
 
@@ -290,11 +296,11 @@ def describe_pipeline(self, pipeline_id, return_dict=False):
             Description of specified pipeline. Includes information such as
             type of pipeline components, problem, training time, cross validation, etc.
         """
-        if pipeline_id not in self.results:
+        if pipeline_id not in self.results['pipeline_results']:
             raise RuntimeError("Pipeline not found")
 
         pipeline = self.get_pipeline(pipeline_id)
-        pipeline_results = self.results[pipeline_id]
+        pipeline_results = self.results['pipeline_results'][pipeline_id]
 
         pipeline.describe()
         self.logger.log_subtitle("Training")
@@ -340,7 +346,7 @@ def rankings(self):
         if self.objective.greater_is_better:
             ascending = False
 
-        rankings_df = pd.DataFrame(self.results.values())
+        rankings_df = pd.DataFrame(self.results['pipeline_results'].values())
         rankings_df = rankings_df[["id", "pipeline_class_name", "score", "high_variance_cv", "parameters"]]
         rankings_df.sort_values("score", ascending=ascending, inplace=True)
         rankings_df.reset_index(drop=True, inplace=True)

diff --git a/evalml/models/pipeline_search_plots.py b/evalml/models/pipeline_search_plots.py
@@ -27,7 +27,7 @@ def get_roc_data(self, pipeline_id):
         if self.data.problem_type != ProblemTypes.BINARY:
             raise RuntimeError("ROC plots can only be generated for binary classification problems.")
 
-        results = self.data.results
+        results = self.data.results['pipeline_results']
         if len(results) == 0:
             raise RuntimeError("You must first call fit() to generate ROC data.")
 
@@ -78,7 +78,7 @@ def generate_roc_plot(self, pipeline_id):
         mean_auc = roc_data["mean_auc"]
         std_auc = roc_data["std_auc"]
 
-        results = self.data.results
+        results = self.data.results['pipeline_results']
         pipeline_name = results[pipeline_id]["pipeline_name"]
 
         layout = go.Layout(title={'text': 'Receiver Operating Characteristic of<br>{} w/ ID={}'.format(pipeline_name, pipeline_id)},
@@ -112,7 +112,7 @@ def get_confusion_matrix_data(self, pipeline_id):
         if self.data.problem_type not in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
             raise RuntimeError("Confusion matrix plots can only be generated for classification problems.")
 
-        results = self.data.results
+        results = self.data.results['pipeline_results']
         if len(results) == 0:
             raise RuntimeError("You must first call fit() to generate confusion matrix data.")
 
@@ -135,7 +135,7 @@ def generate_confusion_matrix(self, pipeline_id, fold_num=None):
 
         """
         data = self.get_confusion_matrix_data(pipeline_id)
-        results = self.data.results
+        results = self.data.results['pipeline_results']
         pipeline_name = results[pipeline_id]["pipeline_name"]
         # defaults to last fold if none specified. May need to think of better approach.
         if fold_num is None:

diff --git a/evalml/tests/automl_tests/test_autobase.py b/evalml/tests/automl_tests/test_autobase.py
@@ -58,3 +58,11 @@ def test_generate_confusion_matrix(X_y):
 
     fig = clf.plot.generate_confusion_matrix(0)
     assert isinstance(fig, type(go.Figure()))
+
+
+def test_search_order(X_y):
+    X, y = X_y
+    clf = AutoClassifier(max_pipelines=3)
+    clf.fit(X, y)
+    correct_order = [0, 1, 2]
+    assert clf.results['search_order'] == correct_order
diff --git a/evalml/tests/automl_tests/test_autoclassifier.py b/evalml/tests/automl_tests/test_autoclassifier.py
@@ -47,13 +47,13 @@ def test_cv(X_y):
     clf.fit(X, y, raise_errors=True)
 
     assert isinstance(clf.rankings, pd.DataFrame)
-    assert len(clf.results[0]["cv_data"]) == cv_folds
+    assert len(clf.results['pipeline_results'][0]["cv_data"]) == cv_folds
 
     clf = AutoClassifier(cv=TimeSeriesSplit(cv_folds), max_pipelines=1)
     clf.fit(X, y, raise_errors=True)
 
     assert isinstance(clf.rankings, pd.DataFrame)
-    assert len(clf.results[0]["cv_data"]) == cv_folds
+    assert len(clf.results['pipeline_results'][0]["cv_data"]) == cv_folds
 
 
 def test_init_select_model_types():

diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py
@@ -19,6 +19,7 @@ def test_generate_roc(X_y):
     class MockAuto(AutoBase):
         def __init__(self):
             self.results = {}
+            self.results['pipeline_results'] = {}
             self.problem_type = ProblemTypes.BINARY
 
         def fit(self):
@@ -45,8 +46,7 @@ def fit(self):
                 ordered_scores.update({"# Testing": len(y_test)})
                 cv_data.append({"all_objective_scores": ordered_scores, "score": score})
 
-            self.results.update({0: {"cv_data": cv_data,
-                                     "pipeline_name": pipeline.name}})
+            self.results['pipeline_results'].update({0: {"cv_data": cv_data, "pipeline_name": pipeline.name}})
 
     mock_clf = MockAuto()
     search_plots = PipelineSearchPlots(mock_clf)
@@ -75,6 +75,7 @@ def test_generate_roc_multi_raises_errors(X_y):
     class MockAutoMulti(AutoBase):
         def __init__(self):
             self.results = {}
+            self.results['pipeline_results'] = {}
             self.problem_type = ProblemTypes.MULTICLASS
 
     mock_clf = MockAutoMulti()
@@ -94,6 +95,7 @@ def test_generate_confusion_matrix(X_y):
     class MockAutoClassifier(AutoBase):
         def __init__(self):
             self.results = {}
+            self.results['pipeline_results'] = {}
             self.problem_type = ProblemTypes.BINARY
 
         def fit(self):
@@ -123,8 +125,8 @@ def fit(self):
                 ordered_scores.update({"# Testing": len(y_test)})
                 cv_data.append({"all_objective_scores": ordered_scores, "score": score})
 
-            self.results.update({0: {"cv_data": cv_data,
-                                     "pipeline_name": pipeline.name}})
+            self.results['pipeline_results'].update({0: {"cv_data": cv_data,
+                                                         "pipeline_name": pipeline.name}})
 
     mock_clf = MockAutoClassifier()
     search_plots = PipelineSearchPlots(mock_clf)
@@ -155,6 +157,7 @@ def test_confusion_matrix_regression_throws_error():
     class MockAutoRegressor(AutoBase):
         def __init__(self):
             self.results = {}
+            self.results['pipeline_results'] = {}
             self.problem_type = ProblemTypes.REGRESSION
 
     mock_clf = MockAutoRegressor()