From 433a0128af3f810f032e8b7a48f9abf31cab0185 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 12:49:17 -0400
Subject: [PATCH 01/18] Added categorical for regression

---
 evalml/pipelines/regression/random_forest.py | 11 ++++++++---
 evalml/tests/test_autoregressor.py           |  7 +++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/evalml/pipelines/regression/random_forest.py b/evalml/pipelines/regression/random_forest.py
index ad93a8756c..c91c8493eb 100644
--- a/evalml/pipelines/regression/random_forest.py
+++ b/evalml/pipelines/regression/random_forest.py
@@ -1,9 +1,11 @@
+import category_encoders as ce
 import numpy as np
 import pandas as pd
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.feature_selection import SelectFromModel
 from sklearn.impute import SimpleImputer
 from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder
 from skopt.space import Integer, Real
 
 from evalml.pipelines import PipelineBase
@@ -20,13 +22,15 @@ class RFRegressionPipeline(PipelineBase):
         "n_estimators": Integer(10, 1000),
         "max_depth": Integer(1, 32),
         "impute_strategy": ["mean", "median", "most_frequent"],
+        "drop_invariant": [True, False],
         "percent_features": Real(.01, 1)
     }
 
-    def __init__(self, objective, n_estimators, max_depth, impute_strategy, percent_features,
+    def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_invariant, percent_features,
                  number_features, n_jobs=1, random_state=0):
 
         imputer = SimpleImputer(strategy=impute_strategy)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
 
         estimator = RandomForestRegressor(random_state=random_state,
                                           n_estimators=n_estimators,
@@ -40,9 +44,10 @@ def __init__(self, objective, n_estimators, max_depth, impute_strategy, percent_
         )
 
         self.pipeline = Pipeline(
-            [("imputer", imputer),
+            [("encoder", enc),
+             ("imputer", imputer),
              ("feature_selection", feature_selection),
-             ("estimator", estimator)]
+             ("estimator", estimator)],
         )
 
         super().__init__(objective=objective, random_state=random_state)
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index c1d86b5d75..50a6d344de 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -53,11 +53,10 @@ def test_random_state(X_y):
 
 def test_categorical(X_y_categorical_regression):
     X, y = X_y_categorical_regression
-    clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0)
+    clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0, model_types=["random_forest"])
     error_msg = 'contains non-numerical data'
-    with pytest.raises(ValueError, match=error_msg):
-        clf.fit(X, y, raise_errors=True)
-
+    clf.fit(X.values, y, raise_errors=True)
+    assert clf.rankings['score'].isnull().any() == False
 
 def test_callback(X_y):
     X, y = X_y

From 7121f63f42dfdd5f1f883f0746a3f30ab02dea3f Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 14:51:38 -0400
Subject: [PATCH 02/18] Added categorical for classificaiton

---
 .../pipelines/classification/logistic_regression.py  |  8 ++++++--
 evalml/pipelines/classification/random_forest.py     | 12 ++++++++----
 evalml/pipelines/classification/xgboost.py           | 10 +++++++---
 evalml/tests/conftest.py                             |  8 ++++++++
 evalml/tests/test_autoclassifier.py                  |  8 ++++++++
 evalml/tests/test_autoregressor.py                   |  2 +-
 6 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/evalml/pipelines/classification/logistic_regression.py b/evalml/pipelines/classification/logistic_regression.py
index e064a33030..4e33ab9ca6 100644
--- a/evalml/pipelines/classification/logistic_regression.py
+++ b/evalml/pipelines/classification/logistic_regression.py
@@ -1,3 +1,4 @@
+import category_encoders as ce
 import numpy as np
 import pandas as pd
 from sklearn.impute import SimpleImputer
@@ -20,11 +21,13 @@ class LogisticRegressionPipeline(PipelineBase):
         "penalty": ["l2"],
         "C": Real(.01, 10),
         "impute_strategy": ["mean", "median", "most_frequent"],
+        "drop_invariant": [True, False]
     }
 
-    def __init__(self, objective, penalty, C, impute_strategy,
+    def __init__(self, objective, penalty, C, impute_strategy, drop_invariant,
                  number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
 
         estimator = LogisticRegression(random_state=random_state,
                                        penalty=penalty,
@@ -34,7 +37,8 @@ def __init__(self, objective, penalty, C, impute_strategy,
                                        n_jobs=-1)
 
         self.pipeline = Pipeline(
-            [("imputer", imputer),
+            [("encoder", enc),
+             ("imputer", imputer),
              ("scaler", StandardScaler()),
              ("estimator", estimator)]
         )
diff --git a/evalml/pipelines/classification/random_forest.py b/evalml/pipelines/classification/random_forest.py
index 06ea8cb1b1..7f707b3475 100644
--- a/evalml/pipelines/classification/random_forest.py
+++ b/evalml/pipelines/classification/random_forest.py
@@ -1,3 +1,4 @@
+import category_encoders as ce
 import numpy as np
 import pandas as pd
 from sklearn.ensemble import RandomForestClassifier
@@ -20,12 +21,14 @@ class RFClassificationPipeline(PipelineBase):
         "n_estimators": Integer(10, 1000),
         "max_depth": Integer(1, 32),
         "impute_strategy": ["mean", "median", "most_frequent"],
-        "percent_features": Real(.01, 1)
+        "percent_features": Real(.01, 1),
+        "drop_invariant": [True, False]
     }
 
-    def __init__(self, objective, n_estimators, max_depth, impute_strategy, percent_features,
-                 number_features, n_jobs=1, random_state=0):
+    def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_invariant,
+            percent_features, number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
 
         estimator = RandomForestClassifier(random_state=random_state,
                                            n_estimators=n_estimators,
@@ -39,7 +42,8 @@ def __init__(self, objective, n_estimators, max_depth, impute_strategy, percent_
         )
 
         self.pipeline = Pipeline(
-            [("imputer", imputer),
+            [("encoder", enc),
+             ("imputer", imputer),
              ("feature_selection", feature_selection),
              ("estimator", estimator)]
         )
diff --git a/evalml/pipelines/classification/xgboost.py b/evalml/pipelines/classification/xgboost.py
index ae2bc63afa..b15e6659a3 100644
--- a/evalml/pipelines/classification/xgboost.py
+++ b/evalml/pipelines/classification/xgboost.py
@@ -1,3 +1,4 @@
+import category_encoders as ce
 import numpy as np
 import pandas as pd
 from sklearn.feature_selection import SelectFromModel
@@ -21,12 +22,14 @@ class XGBoostPipeline(PipelineBase):
         "min_child_weight": Real(1, 10),
         "max_depth": Integer(1, 20),
         "impute_strategy": ["mean", "median", "most_frequent"],
+        "drop_invariant": [True, False],
         "percent_features": Real(.01, 1)
     }
 
-    def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy, percent_features,
-                 number_features, n_jobs=1, random_state=0):
+    def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy, drop_invariant,
+        percent_features,number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
 
         estimator = XGBClassifier(
             random_state=random_state,
@@ -42,7 +45,8 @@ def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy,
         )
 
         self.pipeline = Pipeline(
-            [("imputer", imputer),
+            [("encoder", enc),
+             ("imputer", imputer),
              ("feature_selection", feature_selection),
              ("estimator", estimator)]
         )
diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
index 84eacec411..0782b960d4 100644
--- a/evalml/tests/conftest.py
+++ b/evalml/tests/conftest.py
@@ -28,6 +28,14 @@ def X_y_categorical_regression():
     return X, y
 
 
+@pytest.fixture
+def X_y_categorical_classification():
+    titanic = pd.read_csv('https://featuretools-static.s3.amazonaws.com/evalml/Titanic/train.csv')
+    y = titanic['Survived']
+    X = titanic.drop('Survived', axis=1)
+    return X, y
+
+
 @pytest.fixture
 def trained_model(X_y):
     X, y = X_y
diff --git a/evalml/tests/test_autoclassifier.py b/evalml/tests/test_autoclassifier.py
index 5f5b06ae0a..540f735171 100644
--- a/evalml/tests/test_autoclassifier.py
+++ b/evalml/tests/test_autoclassifier.py
@@ -119,6 +119,14 @@ def test_multi_auto(X_y_multi):
     assert clf.default_objectives == get_objectives('multiclass')
 
 
+def test_categorical_auto(X_y_categorical_classification):
+    X, y = X_y_categorical_classification
+    clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)
+    clf.fit(X.values, y, raise_errors=True)
+    print(clf.rankings)
+    assert clf.rankings['score'].isnull().any() == False
+
+
 def test_random_state(X_y):
     X, y = X_y
 
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 50a6d344de..4b92f75a6e 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -54,10 +54,10 @@ def test_random_state(X_y):
 def test_categorical(X_y_categorical_regression):
     X, y = X_y_categorical_regression
     clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0, model_types=["random_forest"])
-    error_msg = 'contains non-numerical data'
     clf.fit(X.values, y, raise_errors=True)
     assert clf.rankings['score'].isnull().any() == False
 
+
 def test_callback(X_y):
     X, y = X_y
 

From 175ef93842bd7947dab7799b3b9d91d39437e8b6 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 14:52:25 -0400
Subject: [PATCH 03/18] Remove error

---
 evalml/models/auto_base.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py
index 6cacb3dcd6..91b41b6003 100644
--- a/evalml/models/auto_base.py
+++ b/evalml/models/auto_base.py
@@ -5,7 +5,6 @@
 import numpy as np
 import pandas as pd
 from colorama import Style
-from pandas.api.types import is_numeric_dtype
 from tqdm import tqdm
 
 from evalml import preprocessing
@@ -97,10 +96,6 @@ def fit(self, X, y, feature_types=None, raise_errors=False):
         if not isinstance(y, pd.Series):
             y = pd.Series(y)
 
-        for col in X.columns:
-            if not is_numeric_dtype(X[col]):
-                raise ValueError("Input column '{}' contains non-numerical data".format(col))
-
         self._log_title("Beginning pipeline search")
         self._log("Optimizing for %s. " % self.objective.name, new_line=False)
 
@@ -158,6 +153,9 @@ def _do_iteration(self, X, y, pbar, raise_errors):
         pbar.set_description("Testing %s" % (pipeline_class.name))
 
         start = time.time()
+        # print(X)
+        # print(y)
+        # print(pipeline.pipeline)
         scores = []
         all_objective_scores = []
         for train, test in self.cv.split(X, y):

From bde1cd851c89bd2725d6a93ce1f3099219539075 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 14:52:34 -0400
Subject: [PATCH 04/18] Added requiremtns

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index cf43ddcd84..3818470237 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ scikit-optimize[plots]
 colorama
 s3fs==0.2.2
 joblib>=0.10.3
+category_encoders

From 058cf4638721ee496e39995fa04d6ee58a9424fd Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 14:54:26 -0400
Subject: [PATCH 05/18] lint

---
 evalml/pipelines/classification/random_forest.py | 2 +-
 evalml/pipelines/classification/xgboost.py       | 2 +-
 evalml/pipelines/regression/random_forest.py     | 1 -
 evalml/tests/test_autoclassifier.py              | 2 +-
 evalml/tests/test_autoregressor.py               | 2 +-
 5 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/evalml/pipelines/classification/random_forest.py b/evalml/pipelines/classification/random_forest.py
index 7f707b3475..ffd3249afb 100644
--- a/evalml/pipelines/classification/random_forest.py
+++ b/evalml/pipelines/classification/random_forest.py
@@ -26,7 +26,7 @@ class RFClassificationPipeline(PipelineBase):
     }
 
     def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_invariant,
-            percent_features, number_features, n_jobs=1, random_state=0):
+                 percent_features, number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
         enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
 
diff --git a/evalml/pipelines/classification/xgboost.py b/evalml/pipelines/classification/xgboost.py
index b15e6659a3..99bc2826b6 100644
--- a/evalml/pipelines/classification/xgboost.py
+++ b/evalml/pipelines/classification/xgboost.py
@@ -27,7 +27,7 @@ class XGBoostPipeline(PipelineBase):
     }
 
     def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy, drop_invariant,
-        percent_features,number_features, n_jobs=1, random_state=0):
+                 percent_features, number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
         enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
 
diff --git a/evalml/pipelines/regression/random_forest.py b/evalml/pipelines/regression/random_forest.py
index c91c8493eb..491d11dacf 100644
--- a/evalml/pipelines/regression/random_forest.py
+++ b/evalml/pipelines/regression/random_forest.py
@@ -5,7 +5,6 @@
 from sklearn.feature_selection import SelectFromModel
 from sklearn.impute import SimpleImputer
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import OneHotEncoder
 from skopt.space import Integer, Real
 
 from evalml.pipelines import PipelineBase
diff --git a/evalml/tests/test_autoclassifier.py b/evalml/tests/test_autoclassifier.py
index 540f735171..c03d8fc913 100644
--- a/evalml/tests/test_autoclassifier.py
+++ b/evalml/tests/test_autoclassifier.py
@@ -124,7 +124,7 @@ def test_categorical_auto(X_y_categorical_classification):
     clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)
     clf.fit(X.values, y, raise_errors=True)
     print(clf.rankings)
-    assert clf.rankings['score'].isnull().any() == False
+    assert clf.rankings['score'].isnull().any() is False
 
 
 def test_random_state(X_y):
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 4b92f75a6e..2c33a4fb81 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -55,7 +55,7 @@ def test_categorical(X_y_categorical_regression):
     X, y = X_y_categorical_regression
     clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0, model_types=["random_forest"])
     clf.fit(X.values, y, raise_errors=True)
-    assert clf.rankings['score'].isnull().any() == False
+    assert clf.rankings['score'].isnull().any() is False
 
 
 def test_callback(X_y):

From bb85d62f8fdec55b598df2d82107863ffd4f0a18 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 14:56:11 -0400
Subject: [PATCH 06/18] Remove prints

---
 evalml/models/auto_base.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py
index 91b41b6003..8cd450cab0 100644
--- a/evalml/models/auto_base.py
+++ b/evalml/models/auto_base.py
@@ -153,9 +153,6 @@ def _do_iteration(self, X, y, pbar, raise_errors):
         pbar.set_description("Testing %s" % (pipeline_class.name))
 
         start = time.time()
-        # print(X)
-        # print(y)
-        # print(pipeline.pipeline)
         scores = []
         all_objective_scores = []
         for train, test in self.cv.split(X, y):

From f323a4333e1f94abae31c36e0a09157773c14474 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 16:03:04 -0400
Subject: [PATCH 07/18] Fix tests

---
 evalml/pipelines/classification/logistic_regression.py | 2 +-
 evalml/pipelines/classification/random_forest.py       | 2 +-
 evalml/pipelines/classification/xgboost.py             | 2 +-
 evalml/pipelines/regression/random_forest.py           | 2 +-
 evalml/tests/test_autoclassifier.py                    | 3 +--
 evalml/tests/test_autoregressor.py                     | 2 +-
 evalml/tests/test_logistic_regression.py               | 2 +-
 evalml/tests/test_objectives.py                        | 2 +-
 evalml/tests/test_pipelines.py                         | 6 +++---
 evalml/tests/test_rf.py                                | 2 +-
 evalml/tests/test_xgboost.py                           | 2 +-
 11 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/evalml/pipelines/classification/logistic_regression.py b/evalml/pipelines/classification/logistic_regression.py
index 4e33ab9ca6..fb6bd70873 100644
--- a/evalml/pipelines/classification/logistic_regression.py
+++ b/evalml/pipelines/classification/logistic_regression.py
@@ -27,7 +27,7 @@ class LogisticRegressionPipeline(PipelineBase):
     def __init__(self, objective, penalty, C, impute_strategy, drop_invariant,
                  number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
 
         estimator = LogisticRegression(random_state=random_state,
                                        penalty=penalty,
diff --git a/evalml/pipelines/classification/random_forest.py b/evalml/pipelines/classification/random_forest.py
index ffd3249afb..cfd8497406 100644
--- a/evalml/pipelines/classification/random_forest.py
+++ b/evalml/pipelines/classification/random_forest.py
@@ -28,7 +28,7 @@ class RFClassificationPipeline(PipelineBase):
     def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_invariant,
                  percent_features, number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
 
         estimator = RandomForestClassifier(random_state=random_state,
                                            n_estimators=n_estimators,
diff --git a/evalml/pipelines/classification/xgboost.py b/evalml/pipelines/classification/xgboost.py
index 99bc2826b6..eeefc68d47 100644
--- a/evalml/pipelines/classification/xgboost.py
+++ b/evalml/pipelines/classification/xgboost.py
@@ -29,7 +29,7 @@ class XGBoostPipeline(PipelineBase):
     def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy, drop_invariant,
                  percent_features, number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
 
         estimator = XGBClassifier(
             random_state=random_state,
diff --git a/evalml/pipelines/regression/random_forest.py b/evalml/pipelines/regression/random_forest.py
index 491d11dacf..ae5cb3eddd 100644
--- a/evalml/pipelines/regression/random_forest.py
+++ b/evalml/pipelines/regression/random_forest.py
@@ -29,7 +29,7 @@ def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_inv
                  number_features, n_jobs=1, random_state=0):
 
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=False)
+        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
 
         estimator = RandomForestRegressor(random_state=random_state,
                                           n_estimators=n_estimators,
diff --git a/evalml/tests/test_autoclassifier.py b/evalml/tests/test_autoclassifier.py
index c03d8fc913..ab49cebeee 100644
--- a/evalml/tests/test_autoclassifier.py
+++ b/evalml/tests/test_autoclassifier.py
@@ -123,8 +123,7 @@ def test_categorical_auto(X_y_categorical_classification):
     X, y = X_y_categorical_classification
     clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)
     clf.fit(X.values, y, raise_errors=True)
-    print(clf.rankings)
-    assert clf.rankings['score'].isnull().any() is False
+    assert not clf.rankings['score'].isnull().all()
 
 
 def test_random_state(X_y):
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 2c33a4fb81..633990cfc1 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -55,7 +55,7 @@ def test_categorical(X_y_categorical_regression):
     X, y = X_y_categorical_regression
     clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0, model_types=["random_forest"])
     clf.fit(X.values, y, raise_errors=True)
-    assert clf.rankings['score'].isnull().any() is False
+    assert not clf.rankings['score'].isnull().all()
 
 
 def test_callback(X_y):
diff --git a/evalml/tests/test_logistic_regression.py b/evalml/tests/test_logistic_regression.py
index d98e41016e..44209ac880 100644
--- a/evalml/tests/test_logistic_regression.py
+++ b/evalml/tests/test_logistic_regression.py
@@ -7,7 +7,7 @@
 def test_lr_multi(X_y_multi):
     X, y = X_y_multi
     objective = PrecisionMicro()
-    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]))
+    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]))
     clf.fit(X, y)
     clf.score(X, y)
     y_pred = clf.predict(X)
diff --git a/evalml/tests/test_objectives.py b/evalml/tests/test_objectives.py
index 35df777b6a..a0d459bff5 100644
--- a/evalml/tests/test_objectives.py
+++ b/evalml/tests/test_objectives.py
@@ -26,7 +26,7 @@ def test_binary_average(X_y):
     X = pd.DataFrame(X)
     y = pd.Series(y)
 
-    pipeline = LogisticRegressionPipeline(objective=Precision(), penalty='l2', C=1.0, impute_strategy='mean', number_features=0)
+    pipeline = LogisticRegressionPipeline(objective=Precision(), penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=0)
     pipeline.fit(X, y)
     y_pred = pipeline.predict(X)
 
diff --git a/evalml/tests/test_pipelines.py b/evalml/tests/test_pipelines.py
index b549a1fdb9..3813b0c26e 100644
--- a/evalml/tests/test_pipelines.py
+++ b/evalml/tests/test_pipelines.py
@@ -43,7 +43,7 @@ def test_serialization(X_y, trained_model, path_management):
     path = os.path.join(path_management, 'pipe.pkl')
     objective = Precision()
 
-    pipeline = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]))
+    pipeline = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]))
     pipeline.fit(X, y)
     save_pipeline(pipeline, path)
     assert pipeline.score(X, y) == load_pipeline(path).score(X, y)
@@ -60,10 +60,10 @@ def test_reproducibility(X_y):
         amount_col=10
     )
 
-    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]), random_state=0)
+    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]), random_state=0)
     clf.fit(X, y)
 
-    clf_1 = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]), random_state=0)
+    clf_1 = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]), random_state=0)
     clf_1.fit(X, y)
 
     assert clf_1.score(X, y) == clf.score(X, y)
diff --git a/evalml/tests/test_rf.py b/evalml/tests/test_rf.py
index 2e3ec42c59..c49f43c014 100644
--- a/evalml/tests/test_rf.py
+++ b/evalml/tests/test_rf.py
@@ -7,7 +7,7 @@
 def test_rf_multi(X_y_multi):
     X, y = X_y_multi
     objective = PrecisionMicro()
-    clf = RFClassificationPipeline(objective=objective, n_estimators=10, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]))
+    clf = RFClassificationPipeline(objective=objective, n_estimators=10, max_depth=3, impute_strategy='mean', drop_invariant=False, percent_features=1.0, number_features=len(X[0]))
     clf.fit(X, y)
     clf.score(X, y)
     y_pred = clf.predict(X)
diff --git a/evalml/tests/test_xgboost.py b/evalml/tests/test_xgboost.py
index cc005f9302..05fed92641 100644
--- a/evalml/tests/test_xgboost.py
+++ b/evalml/tests/test_xgboost.py
@@ -7,7 +7,7 @@
 def test_xg_multi(X_y_multi):
     X, y = X_y_multi
     objective = PrecisionMicro()
-    clf = XGBoostPipeline(objective=objective, eta=0.1, min_child_weight=1, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]))
+    clf = XGBoostPipeline(objective=objective, eta=0.1, min_child_weight=1, max_depth=3, impute_strategy='mean', drop_invariant=False, percent_features=1.0, number_features=len(X[0]))
     clf.fit(X, y)
     clf.score(X, y)
     y_pred = clf.predict(X)

From 9301e894bbb3cb2a00b8ce6d220b048dc8cc6983 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Fri, 13 Sep 2019 16:12:37 -0400
Subject: [PATCH 08/18] Clean up

---
 evalml/tests/test_autoclassifier.py | 2 +-
 evalml/tests/test_autoregressor.py  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/evalml/tests/test_autoclassifier.py b/evalml/tests/test_autoclassifier.py
index ab49cebeee..def93c5a28 100644
--- a/evalml/tests/test_autoclassifier.py
+++ b/evalml/tests/test_autoclassifier.py
@@ -122,7 +122,7 @@ def test_multi_auto(X_y_multi):
 def test_categorical_auto(X_y_categorical_classification):
     X, y = X_y_categorical_classification
     clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)
-    clf.fit(X.values, y, raise_errors=True)
+    clf.fit(X.values, y)
     assert not clf.rankings['score'].isnull().all()
 
 
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 633990cfc1..6ef7dd93d7 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -53,8 +53,8 @@ def test_random_state(X_y):
 
 def test_categorical(X_y_categorical_regression):
     X, y = X_y_categorical_regression
-    clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0, model_types=["random_forest"])
-    clf.fit(X.values, y, raise_errors=True)
+    clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0)
+    clf.fit(X.values, y)
     assert not clf.rankings['score'].isnull().all()
 
 

From c38fb437fececcc6e8f46e1a01498743f05c6e2f Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Mon, 16 Sep 2019 17:44:24 -0400
Subject: [PATCH 09/18] Cleanup, fix feature_importance, and get rid of
 uncessary tuning

---
 evalml/pipelines/classification/logistic_regression.py | 5 ++---
 evalml/pipelines/classification/random_forest.py       | 5 ++---
 evalml/pipelines/classification/xgboost.py             | 5 ++---
 evalml/pipelines/pipeline_base.py                      | 2 +-
 evalml/pipelines/regression/random_forest.py           | 6 +++---
 evalml/tests/test_autoregressor.py                     | 2 ++
 evalml/tests/test_logistic_regression.py               | 2 +-
 evalml/tests/test_objectives.py                        | 2 +-
 evalml/tests/test_pipelines.py                         | 6 +++---
 evalml/tests/test_rf.py                                | 2 +-
 evalml/tests/test_xgboost.py                           | 2 +-
 11 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/evalml/pipelines/classification/logistic_regression.py b/evalml/pipelines/classification/logistic_regression.py
index fb6bd70873..d77484fbb5 100644
--- a/evalml/pipelines/classification/logistic_regression.py
+++ b/evalml/pipelines/classification/logistic_regression.py
@@ -21,13 +21,12 @@ class LogisticRegressionPipeline(PipelineBase):
         "penalty": ["l2"],
         "C": Real(.01, 10),
         "impute_strategy": ["mean", "median", "most_frequent"],
-        "drop_invariant": [True, False]
     }
 
-    def __init__(self, objective, penalty, C, impute_strategy, drop_invariant,
+    def __init__(self, objective, penalty, C, impute_strategy,
                  number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
+        enc = ce.OneHotEncoder(use_cat_names=True, return_df=True)
 
         estimator = LogisticRegression(random_state=random_state,
                                        penalty=penalty,
diff --git a/evalml/pipelines/classification/random_forest.py b/evalml/pipelines/classification/random_forest.py
index cfd8497406..1827a886af 100644
--- a/evalml/pipelines/classification/random_forest.py
+++ b/evalml/pipelines/classification/random_forest.py
@@ -22,13 +22,12 @@ class RFClassificationPipeline(PipelineBase):
         "max_depth": Integer(1, 32),
         "impute_strategy": ["mean", "median", "most_frequent"],
         "percent_features": Real(.01, 1),
-        "drop_invariant": [True, False]
     }
 
-    def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_invariant,
+    def __init__(self, objective, n_estimators, max_depth, impute_strategy,
                  percent_features, number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
+        enc = ce.OneHotEncoder(use_cat_names=True, return_df=True)
 
         estimator = RandomForestClassifier(random_state=random_state,
                                            n_estimators=n_estimators,
diff --git a/evalml/pipelines/classification/xgboost.py b/evalml/pipelines/classification/xgboost.py
index eeefc68d47..242fd0ea73 100644
--- a/evalml/pipelines/classification/xgboost.py
+++ b/evalml/pipelines/classification/xgboost.py
@@ -22,14 +22,13 @@ class XGBoostPipeline(PipelineBase):
         "min_child_weight": Real(1, 10),
         "max_depth": Integer(1, 20),
         "impute_strategy": ["mean", "median", "most_frequent"],
-        "drop_invariant": [True, False],
         "percent_features": Real(.01, 1)
     }
 
-    def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy, drop_invariant,
+    def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy,
                  percent_features, number_features, n_jobs=1, random_state=0):
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
+        enc = ce.OneHotEncoder(use_cat_names=True, return_df=True)
 
         estimator = XGBClassifier(
             random_state=random_state,
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
index d2dac014be..2906202f70 100644
--- a/evalml/pipelines/pipeline_base.py
+++ b/evalml/pipelines/pipeline_base.py
@@ -31,11 +31,11 @@ def fit(self, X, y, objective_fit_size=.2):
         if not isinstance(y, pd.Series):
             y = pd.Series(y)
 
-        self.input_feature_names = X.columns.tolist()
         if self.objective.needs_fitting:
             X, X_objective, y, y_objective = train_test_split(X, y, test_size=objective_fit_size, random_state=self.random_state)
 
         self.pipeline.fit(X, y)
+        self.input_feature_names = self.pipeline['encoder'].get_feature_names()
 
         if self.objective.needs_fitting:
             if self.objective.fit_needs_proba:
diff --git a/evalml/pipelines/regression/random_forest.py b/evalml/pipelines/regression/random_forest.py
index ae5cb3eddd..4c91f445f9 100644
--- a/evalml/pipelines/regression/random_forest.py
+++ b/evalml/pipelines/regression/random_forest.py
@@ -21,15 +21,14 @@ class RFRegressionPipeline(PipelineBase):
         "n_estimators": Integer(10, 1000),
         "max_depth": Integer(1, 32),
         "impute_strategy": ["mean", "median", "most_frequent"],
-        "drop_invariant": [True, False],
         "percent_features": Real(.01, 1)
     }
 
-    def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_invariant, percent_features,
+    def __init__(self, objective, n_estimators, max_depth, impute_strategy, percent_features,
                  number_features, n_jobs=1, random_state=0):
 
         imputer = SimpleImputer(strategy=impute_strategy)
-        enc = ce.OneHotEncoder(drop_invariant=drop_invariant, return_df=True)
+        enc = ce.OneHotEncoder(use_cat_names=True, return_df=True)
 
         estimator = RandomForestRegressor(random_state=random_state,
                                           n_estimators=n_estimators,
@@ -55,6 +54,7 @@ def __init__(self, objective, n_estimators, max_depth, impute_strategy, drop_inv
     def feature_importances(self):
         """Return feature importances. Feature dropped by feaure selection are excluded"""
         indices = self.pipeline["feature_selection"].get_support(indices=True)
+        # need to fix inpujt_feature_names as it takes from orignal columns
         feature_names = list(map(lambda i: self.input_feature_names[i], indices))
         importances = list(zip(feature_names, self.pipeline["estimator"].feature_importances_))  # note: this only works for binary
         importances.sort(key=lambda x: -abs(x[1]))
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 6ef7dd93d7..b1ba3fbc8b 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -56,6 +56,8 @@ def test_categorical(X_y_categorical_regression):
     clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0)
     clf.fit(X.values, y)
     assert not clf.rankings['score'].isnull().all()
+    assert not clf.get_pipeline(0).feature_importances.isnull().all().all()
+
 
 
 def test_callback(X_y):
diff --git a/evalml/tests/test_logistic_regression.py b/evalml/tests/test_logistic_regression.py
index 44209ac880..d98e41016e 100644
--- a/evalml/tests/test_logistic_regression.py
+++ b/evalml/tests/test_logistic_regression.py
@@ -7,7 +7,7 @@
 def test_lr_multi(X_y_multi):
     X, y = X_y_multi
     objective = PrecisionMicro()
-    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]))
+    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]))
     clf.fit(X, y)
     clf.score(X, y)
     y_pred = clf.predict(X)
diff --git a/evalml/tests/test_objectives.py b/evalml/tests/test_objectives.py
index a0d459bff5..35df777b6a 100644
--- a/evalml/tests/test_objectives.py
+++ b/evalml/tests/test_objectives.py
@@ -26,7 +26,7 @@ def test_binary_average(X_y):
     X = pd.DataFrame(X)
     y = pd.Series(y)
 
-    pipeline = LogisticRegressionPipeline(objective=Precision(), penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=0)
+    pipeline = LogisticRegressionPipeline(objective=Precision(), penalty='l2', C=1.0, impute_strategy='mean', number_features=0)
     pipeline.fit(X, y)
     y_pred = pipeline.predict(X)
 
diff --git a/evalml/tests/test_pipelines.py b/evalml/tests/test_pipelines.py
index 3813b0c26e..b549a1fdb9 100644
--- a/evalml/tests/test_pipelines.py
+++ b/evalml/tests/test_pipelines.py
@@ -43,7 +43,7 @@ def test_serialization(X_y, trained_model, path_management):
     path = os.path.join(path_management, 'pipe.pkl')
     objective = Precision()
 
-    pipeline = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]))
+    pipeline = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]))
     pipeline.fit(X, y)
     save_pipeline(pipeline, path)
     assert pipeline.score(X, y) == load_pipeline(path).score(X, y)
@@ -60,10 +60,10 @@ def test_reproducibility(X_y):
         amount_col=10
     )
 
-    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]), random_state=0)
+    clf = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]), random_state=0)
     clf.fit(X, y)
 
-    clf_1 = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', drop_invariant=False, number_features=len(X[0]), random_state=0)
+    clf_1 = LogisticRegressionPipeline(objective=objective, penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]), random_state=0)
     clf_1.fit(X, y)
 
     assert clf_1.score(X, y) == clf.score(X, y)
diff --git a/evalml/tests/test_rf.py b/evalml/tests/test_rf.py
index c49f43c014..2e3ec42c59 100644
--- a/evalml/tests/test_rf.py
+++ b/evalml/tests/test_rf.py
@@ -7,7 +7,7 @@
 def test_rf_multi(X_y_multi):
     X, y = X_y_multi
     objective = PrecisionMicro()
-    clf = RFClassificationPipeline(objective=objective, n_estimators=10, max_depth=3, impute_strategy='mean', drop_invariant=False, percent_features=1.0, number_features=len(X[0]))
+    clf = RFClassificationPipeline(objective=objective, n_estimators=10, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]))
     clf.fit(X, y)
     clf.score(X, y)
     y_pred = clf.predict(X)
diff --git a/evalml/tests/test_xgboost.py b/evalml/tests/test_xgboost.py
index 05fed92641..cc005f9302 100644
--- a/evalml/tests/test_xgboost.py
+++ b/evalml/tests/test_xgboost.py
@@ -7,7 +7,7 @@
 def test_xg_multi(X_y_multi):
     X, y = X_y_multi
     objective = PrecisionMicro()
-    clf = XGBoostPipeline(objective=objective, eta=0.1, min_child_weight=1, max_depth=3, impute_strategy='mean', drop_invariant=False, percent_features=1.0, number_features=len(X[0]))
+    clf = XGBoostPipeline(objective=objective, eta=0.1, min_child_weight=1, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]))
     clf.fit(X, y)
     clf.score(X, y)
     y_pred = clf.predict(X)

From 5728f32dcccb3c9017ae2b460f59656301253b4e Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Tue, 17 Sep 2019 10:45:08 -0400
Subject: [PATCH 10/18] set requirement ver

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3818470237..e4f436c2e5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ scikit-optimize[plots]
 colorama
 s3fs==0.2.2
 joblib>=0.10.3
-category_encoders
+category_encoders==2.0.0

From c5d3b2d5c370b0140d975c0421906013bb98387f Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Tue, 17 Sep 2019 10:53:07 -0400
Subject: [PATCH 11/18] Lint

---
 evalml/pipelines/classification/random_forest.py | 2 +-
 evalml/tests/test_autoregressor.py               | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/evalml/pipelines/classification/random_forest.py b/evalml/pipelines/classification/random_forest.py
index 1827a886af..4f56eb13a2 100644
--- a/evalml/pipelines/classification/random_forest.py
+++ b/evalml/pipelines/classification/random_forest.py
@@ -21,7 +21,7 @@ class RFClassificationPipeline(PipelineBase):
         "n_estimators": Integer(10, 1000),
         "max_depth": Integer(1, 32),
         "impute_strategy": ["mean", "median", "most_frequent"],
-        "percent_features": Real(.01, 1),
+        "percent_features": Real(.01, 1)
     }
 
     def __init__(self, objective, n_estimators, max_depth, impute_strategy,
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index b1ba3fbc8b..33989d5cf7 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -59,7 +59,6 @@ def test_categorical(X_y_categorical_regression):
     assert not clf.get_pipeline(0).feature_importances.isnull().all().all()
 
 
-
 def test_callback(X_y):
     X, y = X_y
 

From 3703fe1e7d2658301eaa48b7aab453fc8451da52 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Tue, 17 Sep 2019 13:27:54 -0400
Subject: [PATCH 12/18] Fix tests

---
 evalml/models/auto_base.py          | 1 -
 evalml/pipelines/pipeline_base.py   | 2 +-
 evalml/tests/test_autoclassifier.py | 3 ++-
 evalml/tests/test_autoregressor.py  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/evalml/models/auto_base.py b/evalml/models/auto_base.py
index 8cd450cab0..e181790dbc 100644
--- a/evalml/models/auto_base.py
+++ b/evalml/models/auto_base.py
@@ -160,7 +160,6 @@ def _do_iteration(self, X, y, pbar, raise_errors):
                 X_train, X_test = X.iloc[train], X.iloc[test]
             else:
                 X_train, X_test = X[train], X[test]
-
             if isinstance(y, pd.Series):
                 y_train, y_test = y.iloc[train], y.iloc[test]
             else:
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
index 2906202f70..2170a04e26 100644
--- a/evalml/pipelines/pipeline_base.py
+++ b/evalml/pipelines/pipeline_base.py
@@ -35,7 +35,7 @@ def fit(self, X, y, objective_fit_size=.2):
             X, X_objective, y, y_objective = train_test_split(X, y, test_size=objective_fit_size, random_state=self.random_state)
 
         self.pipeline.fit(X, y)
-        self.input_feature_names = self.pipeline['encoder'].get_feature_names()
+        self.input_feature_names = self.pipeline['encoder'].feature_names
 
         if self.objective.needs_fitting:
             if self.objective.fit_needs_proba:
diff --git a/evalml/tests/test_autoclassifier.py b/evalml/tests/test_autoclassifier.py
index def93c5a28..a3ee845ddb 100644
--- a/evalml/tests/test_autoclassifier.py
+++ b/evalml/tests/test_autoclassifier.py
@@ -122,8 +122,9 @@ def test_multi_auto(X_y_multi):
 def test_categorical_auto(X_y_categorical_classification):
     X, y = X_y_categorical_classification
     clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)
-    clf.fit(X.values, y)
+    clf.fit(X, y)
     assert not clf.rankings['score'].isnull().all()
+    assert not clf.get_pipeline(0).feature_importances.isnull().all().all()
 
 
 def test_random_state(X_y):
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 33989d5cf7..79cce08c12 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -54,7 +54,7 @@ def test_random_state(X_y):
 def test_categorical(X_y_categorical_regression):
     X, y = X_y_categorical_regression
     clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0)
-    clf.fit(X.values, y)
+    clf.fit(X, y)
     assert not clf.rankings['score'].isnull().all()
     assert not clf.get_pipeline(0).feature_importances.isnull().all().all()
 

From 00932be0d5bdeb72022042a23fc744f617c76ef5 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Tue, 17 Sep 2019 13:33:13 -0400
Subject: [PATCH 13/18] Remove comment

---
 evalml/pipelines/regression/random_forest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/evalml/pipelines/regression/random_forest.py b/evalml/pipelines/regression/random_forest.py
index 4c91f445f9..4a46c3ac1f 100644
--- a/evalml/pipelines/regression/random_forest.py
+++ b/evalml/pipelines/regression/random_forest.py
@@ -54,7 +54,6 @@ def __init__(self, objective, n_estimators, max_depth, impute_strategy, percent_
     def feature_importances(self):
         """Return feature importances. Feature dropped by feaure selection are excluded"""
         indices = self.pipeline["feature_selection"].get_support(indices=True)
-        # need to fix inpujt_feature_names as it takes from orignal columns
         feature_names = list(map(lambda i: self.input_feature_names[i], indices))
         importances = list(zip(feature_names, self.pipeline["estimator"].feature_importances_))  # note: this only works for binary
         importances.sort(key=lambda x: -abs(x[1]))

From 690f14ee34f0c8fbff90c17f227ed48cf0864f63 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Tue, 17 Sep 2019 13:34:39 -0400
Subject: [PATCH 14/18] Change test names'

---
 evalml/tests/test_autoclassifier.py | 2 +-
 evalml/tests/test_autoregressor.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/evalml/tests/test_autoclassifier.py b/evalml/tests/test_autoclassifier.py
index a3ee845ddb..933d739ffd 100644
--- a/evalml/tests/test_autoclassifier.py
+++ b/evalml/tests/test_autoclassifier.py
@@ -119,7 +119,7 @@ def test_multi_auto(X_y_multi):
     assert clf.default_objectives == get_objectives('multiclass')
 
 
-def test_categorical_auto(X_y_categorical_classification):
+def test_categorical_classification(X_y_categorical_classification):
     X, y = X_y_categorical_classification
     clf = AutoClassifier(objective="recall", max_pipelines=5, multiclass=False)
     clf.fit(X, y)
diff --git a/evalml/tests/test_autoregressor.py b/evalml/tests/test_autoregressor.py
index 79cce08c12..738a2f8db8 100644
--- a/evalml/tests/test_autoregressor.py
+++ b/evalml/tests/test_autoregressor.py
@@ -51,7 +51,7 @@ def test_random_state(X_y):
     assert pd.testing.assert_frame_equal(clf.rankings, clf_1.rankings) is None
 
 
-def test_categorical(X_y_categorical_regression):
+def test_categorical_regression(X_y_categorical_regression):
     X, y = X_y_categorical_regression
     clf = AutoRegressor(objective="R2", max_pipelines=5, random_state=0)
     clf.fit(X, y)

From ef6e3c899d78f00efef4413d966a1c58567f4b66 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Tue, 17 Sep 2019 16:34:57 -0400
Subject: [PATCH 15/18] Changed data to include categorical type

---
 evalml/tests/conftest.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
index 0782b960d4..29740382c0 100644
--- a/evalml/tests/conftest.py
+++ b/evalml/tests/conftest.py
@@ -25,6 +25,9 @@ def X_y_categorical_regression():
     flights = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
     y = flights['tip']
     X = flights.drop('tip', axis=1)
+
+    # add categorical dtype
+    X['smoker'] = X['smoker'].astype('category')
     return X, y
 
 

From c0f1006df037ea7f70f25c90f21a764ba4d66119 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Tue, 17 Sep 2019 17:28:30 -0400
Subject: [PATCH 16/18] set to min

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e4f436c2e5..e15bb34e8d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ scikit-optimize[plots]
 colorama
 s3fs==0.2.2
 joblib>=0.10.3
-category_encoders==2.0.0
+category_encoders>=2.0.0

From 02a072d6fab8dab206d4369cef30828d7f5f6834 Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Wed, 18 Sep 2019 10:09:05 -0400
Subject: [PATCH 17/18] Updated docs

---
 docs/source/automl/pipeline_search.ipynb    |   2 +-
 docs/source/automl/regression_example.ipynb |   2 +-
 docs/source/automl/search_results.ipynb     |   2 +-
 docs/source/demos/fraud.ipynb               | 244 ++++++++++----------
 docs/source/roadmap.rst                     |   3 +
 5 files changed, 125 insertions(+), 128 deletions(-)

diff --git a/docs/source/automl/pipeline_search.ipynb b/docs/source/automl/pipeline_search.ipynb
index e1b0e6e79d..845902ec96 100644
--- a/docs/source/automl/pipeline_search.ipynb
+++ b/docs/source/automl/pipeline_search.ipynb
@@ -20,7 +20,7 @@
    "source": [
     "## How it works\n",
     "\n",
-    "EvalML selects and tunes machine learning pipelines built of numerous steps. This includes missing value imputation, feature selection, feature scaling, and finally machine learning. As EvalML tunes pipelines, it uses the objective function selected and configured by the user to guide its search. \n",
+    "EvalML selects and tunes machine learning pipelines built of numerous steps. This includes encoding categorical data, missing value imputation, feature selection, feature scaling, and finally machine learning. As EvalML tunes pipelines, it uses the objective function selected and configured by the user to guide its search. \n",
     "\n",
     "\n",
     "At each iteration, EvalML uses cross-validation to generate an estimate of the pipeline's performances. If a pipeline has high variance across cross-validation folds, it will provide a warning. In this case, the pipeline may not perform reliably in the future.\n",
diff --git a/docs/source/automl/regression_example.ipynb b/docs/source/automl/regression_example.ipynb
index afe3fb1ebe..9c47f21cfc 100644
--- a/docs/source/automl/regression_example.ipynb
+++ b/docs/source/automl/regression_example.ipynb
@@ -245,7 +245,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/automl/search_results.ipynb b/docs/source/automl/search_results.ipynb
index 407f7cdec3..9ccc9b627b 100644
--- a/docs/source/automl/search_results.ipynb
+++ b/docs/source/automl/search_results.ipynb
@@ -811,7 +811,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/demos/fraud.ipynb b/docs/source/demos/fraud.ipynb
index 4332065931..28fd8517e7 100644
--- a/docs/source/demos/fraud.ipynb
+++ b/docs/source/demos/fraud.ipynb
@@ -90,16 +90,38 @@
     "X, y = evalml.demos.load_fraud()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "EvalML natively supports one-hot encoding. Here we keep 1 out of the 6 categorical columns to decrease computation time."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "card_id               int64\n",
+      "store_id              int64\n",
+      "amount                int64\n",
+      "currency             object\n",
+      "customer_present       bool\n",
+      "lat                 float64\n",
+      "lng                 float64\n",
+      "dtype: object\n"
+     ]
+    }
+   ],
    "source": [
-    "# select numeric data before running AutoClassifer\n",
-    "numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bool']\n",
-    "X = X.select_dtypes(include=numerics)\n",
-    "X_train, X_holdout, y_train, y_holdout = evalml.preprocessing.split_data(X, y, test_size=.8, random_state=0)"
+    "X = X.drop(['datetime', 'expiration_date', 'country', 'region', 'provider'], axis=1)\n",
+    "X_train, X_holdout, y_train, y_holdout = evalml.preprocessing.split_data(X, y, test_size=0.2, random_state=0)\n",
+    "\n",
+    "print(X.dtypes)"
    ]
   },
   {
@@ -124,11 +146,11 @@
       "\n",
       "Optimizing for Fraud Cost. Lower score is better.\n",
       "\n",
-      "Searching up to 10 pipelines. No time limit is set. Set one using max_time parameter.\n",
+      "Searching up to 5 pipelines. No time limit is set. Set one using max_time parameter.\n",
       "\n",
       "Possible model types: xgboost, linear_model, random_forest\n",
       "\n",
-      "Testing XGBoost w/ imputation: 100%|██████████| 10/10 [01:05<00:00,  6.53s/it]                    \n",
+      "Testing LogisticRegression w/ imputation + scaling: 100%|██████████| 5/5 [18:10<00:00, 218.05s/it]\n",
       "\n",
       "✔ Optimization finished\n"
      ]
@@ -136,7 +158,8 @@
    ],
    "source": [
     "clf = evalml.AutoClassifier(objective=fraud_objective,\n",
-    "                            max_pipelines=10)\n",
+    "                            additional_objectives=['auc', 'recall', 'precision'],\n",
+    "                            max_pipelines=5)\n",
     "\n",
     "clf.fit(X_train, y_train)"
    ]
@@ -185,43 +208,63 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "      <td>XGBoostPipeline</td>\n",
-       "      <td>0.007614</td>\n",
+       "      <td>0.007623</td>\n",
+       "      <td>False</td>\n",
+       "      <td>{'eta': 0.38438170729269994, 'min_child_weight...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>LogisticRegressionPipeline</td>\n",
+       "      <td>0.007623</td>\n",
        "      <td>False</td>\n",
-       "      <td>{'eta': 0.6481718720511973, 'min_child_weight'...</td>\n",
+       "      <td>{'penalty': 'l2', 'C': 8.444214828324364, 'imp...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
        "      <td>2</td>\n",
+       "      <td>0</td>\n",
        "      <td>XGBoostPipeline</td>\n",
-       "      <td>0.007614</td>\n",
+       "      <td>0.007623</td>\n",
        "      <td>False</td>\n",
        "      <td>{'eta': 0.5928446182250184, 'min_child_weight'...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>8</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>XGBoostPipeline</td>\n",
+       "      <td>0.007623</td>\n",
+       "      <td>False</td>\n",
+       "      <td>{'eta': 0.5288949197529046, 'min_child_weight'...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
        "      <td>RFClassificationPipeline</td>\n",
-       "      <td>0.007614</td>\n",
+       "      <td>0.007623</td>\n",
        "      <td>False</td>\n",
-       "      <td>{'n_estimators': 369, 'max_depth': 10, 'impute...</td>\n",
+       "      <td>{'n_estimators': 569, 'max_depth': 22, 'impute...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   id             pipeline_name     score  high_variance_cv  \\\n",
-       "0   9           XGBoostPipeline  0.007614             False   \n",
-       "1   2           XGBoostPipeline  0.007614             False   \n",
-       "2   8  RFClassificationPipeline  0.007614             False   \n",
+       "   id               pipeline_name     score  high_variance_cv  \\\n",
+       "0   1             XGBoostPipeline  0.007623             False   \n",
+       "1   4  LogisticRegressionPipeline  0.007623             False   \n",
+       "2   0             XGBoostPipeline  0.007623             False   \n",
+       "3   3             XGBoostPipeline  0.007623             False   \n",
+       "4   2    RFClassificationPipeline  0.007623             False   \n",
        "\n",
        "                                          parameters  \n",
-       "0  {'eta': 0.6481718720511973, 'min_child_weight'...  \n",
-       "1  {'eta': 0.5928446182250184, 'min_child_weight'...  \n",
-       "2  {'n_estimators': 369, 'max_depth': 10, 'impute...  "
+       "0  {'eta': 0.38438170729269994, 'min_child_weight...  \n",
+       "1  {'penalty': 'l2', 'C': 8.444214828324364, 'imp...  \n",
+       "2  {'eta': 0.5928446182250184, 'min_child_weight'...  \n",
+       "3  {'eta': 0.5288949197529046, 'min_child_weight'...  \n",
+       "4  {'n_estimators': 569, 'max_depth': 22, 'impute...  "
       ]
      },
      "execution_count": 6,
@@ -230,7 +273,7 @@
     }
    ],
    "source": [
-    "clf.rankings.head(3)"
+    "clf.rankings"
    ]
   },
   {
@@ -274,25 +317,25 @@
       "Pipeline Name: XGBoost w/ imputation\n",
       "Model type: xgboost\n",
       "Objective: Fraud Cost (lower is better)\n",
-      "Total training time (including CV): 6.1 seconds\n",
+      "Total training time (including CV): 383.7 seconds\n",
       "\n",
       "Parameters\n",
       "==========\n",
-      "• eta: 0.6481718720511973\n",
-      "• min_child_weight: 4.314173858564932\n",
-      "• max_depth: 6\n",
-      "• impute_strategy: most_frequent\n",
-      "• percent_features: 0.871312026764351\n",
+      "• eta: 0.38438170729269994\n",
+      "• min_child_weight: 3.677811458900251\n",
+      "• max_depth: 13\n",
+      "• impute_strategy: median\n",
+      "• percent_features: 0.793807787701838\n",
       "\n",
       "Cross Validation\n",
       "=================\n",
-      "               F1  Precision  Recall   AUC  Log Loss  Fraud Cost # Training # Testing\n",
-      "0           0.264      0.152   0.264 0.841     0.192       0.008  13332.000  6666.000\n",
-      "1           0.264      0.152   0.264 0.845     0.191       0.008  13332.000  6666.000\n",
-      "2           0.264      0.152   0.264 0.834     0.202       0.008  13332.000  6666.000\n",
-      "mean        0.264      0.152   0.264 0.840     0.195       0.008          -         -\n",
-      "std         0.000      0.000   0.000 0.006     0.006       0.000          -         -\n",
-      "coef of var 0.000      0.000   0.000 0.007     0.029       0.002          -         -\n"
+      "              AUC  Recall  Precision  Fraud Cost # Training # Testing\n",
+      "0           0.831   0.264      0.152       0.008  53328.000 26665.000\n",
+      "1           0.833   0.264      0.152       0.008  53328.000 26665.000\n",
+      "2           0.836   0.264      0.152       0.008  53330.000 26663.000\n",
+      "mean        0.834   0.264      0.152       0.008          -         -\n",
+      "std         0.003   0.000      0.000       0.000          -         -\n",
+      "coef of var 0.003   0.000      0.000       0.003          -         -\n"
      ]
     }
    ],
@@ -317,7 +360,7 @@
     {
      "data": {
       "text/plain": [
-       "<evalml.pipelines.classification.xgboost.XGBoostPipeline at 0x127cbaa90>"
+       "<evalml.pipelines.classification.xgboost.XGBoostPipeline at 0x128176a50>"
       ]
      },
      "execution_count": 9,
@@ -344,8 +387,8 @@
     {
      "data": {
       "text/plain": [
-       "(0.007625590107402798,\n",
-       " {'AUC': 0.8497570761248703, 'Fraud Cost': 0.007625590107402798})"
+       "(0.007626457064581641,\n",
+       " {'AUC': 0.8336438887334185, 'Fraud Cost': 0.007626457064581641})"
       ]
      },
      "execution_count": 10,
@@ -381,11 +424,11 @@
       "\n",
       "Optimizing for AUC. Greater score is better.\n",
       "\n",
-      "Searching up to 10 pipelines. No time limit is set. Set one using max_time parameter.\n",
+      "Searching up to 5 pipelines. No time limit is set. Set one using max_time parameter.\n",
       "\n",
       "Possible model types: xgboost, linear_model, random_forest\n",
       "\n",
-      "Testing XGBoost w/ imputation: 100%|██████████| 10/10 [01:29<00:00,  8.96s/it]                    \n",
+      "Testing LogisticRegression w/ imputation + scaling: 100%|██████████| 5/5 [19:16<00:00, 231.26s/it]\n",
       "\n",
       "✔ Optimization finished\n"
      ]
@@ -393,7 +436,8 @@
    ],
    "source": [
     "clf_auc = evalml.AutoClassifier(objective='auc',\n",
-    "                            max_pipelines=10)\n",
+    "                                additional_objectives=['recall', 'precision'],\n",
+    "                                max_pipelines=5)\n",
     "\n",
     "clf_auc.fit(X_train, y_train)"
    ]
@@ -440,113 +484,63 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
        "      <td>RFClassificationPipeline</td>\n",
-       "      <td>0.863238</td>\n",
+       "      <td>0.873053</td>\n",
        "      <td>False</td>\n",
        "      <td>{'n_estimators': 569, 'max_depth': 22, 'impute...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "      <td>XGBoostPipeline</td>\n",
-       "      <td>0.852239</td>\n",
+       "      <td>0.849826</td>\n",
        "      <td>False</td>\n",
-       "      <td>{'eta': 0.38438170729269994, 'min_child_weight...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>RFClassificationPipeline</td>\n",
-       "      <td>0.847514</td>\n",
-       "      <td>False</td>\n",
-       "      <td>{'n_estimators': 369, 'max_depth': 10, 'impute...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>8</td>\n",
-       "      <td>RFClassificationPipeline</td>\n",
-       "      <td>0.846346</td>\n",
-       "      <td>False</td>\n",
-       "      <td>{'n_estimators': 715, 'max_depth': 7, 'impute_...</td>\n",
+       "      <td>{'eta': 0.5928446182250184, 'min_child_weight'...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
        "      <td>2</td>\n",
+       "      <td>1</td>\n",
        "      <td>XGBoostPipeline</td>\n",
-       "      <td>0.845902</td>\n",
+       "      <td>0.840634</td>\n",
        "      <td>False</td>\n",
-       "      <td>{'eta': 0.5928446182250184, 'min_child_weight'...</td>\n",
+       "      <td>{'eta': 0.38438170729269994, 'min_child_weight...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>5</td>\n",
-       "      <td>RFClassificationPipeline</td>\n",
-       "      <td>0.842745</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>XGBoostPipeline</td>\n",
+       "      <td>0.839091</td>\n",
        "      <td>False</td>\n",
-       "      <td>{'n_estimators': 609, 'max_depth': 7, 'impute_...</td>\n",
+       "      <td>{'eta': 0.5288949197529046, 'min_child_weight'...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>6</th>\n",
+       "      <td>4</td>\n",
        "      <td>4</td>\n",
        "      <td>LogisticRegressionPipeline</td>\n",
-       "      <td>0.838806</td>\n",
-       "      <td>False</td>\n",
-       "      <td>{'penalty': 'l2', 'C': 6.239401330891865, 'imp...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>0</td>\n",
-       "      <td>LogisticRegressionPipeline</td>\n",
-       "      <td>0.838806</td>\n",
+       "      <td>0.831181</td>\n",
        "      <td>False</td>\n",
        "      <td>{'penalty': 'l2', 'C': 8.444214828324364, 'imp...</td>\n",
        "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>7</td>\n",
-       "      <td>LogisticRegressionPipeline</td>\n",
-       "      <td>0.838806</td>\n",
-       "      <td>False</td>\n",
-       "      <td>{'penalty': 'l2', 'C': 8.123565600467177, 'imp...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>6</td>\n",
-       "      <td>LogisticRegressionPipeline</td>\n",
-       "      <td>0.838806</td>\n",
-       "      <td>False</td>\n",
-       "      <td>{'penalty': 'l2', 'C': 0.5765626434012575, 'im...</td>\n",
-       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "   id               pipeline_name     score  high_variance_cv  \\\n",
-       "0   1    RFClassificationPipeline  0.863238             False   \n",
-       "1   9             XGBoostPipeline  0.852239             False   \n",
-       "2   3    RFClassificationPipeline  0.847514             False   \n",
-       "3   8    RFClassificationPipeline  0.846346             False   \n",
-       "4   2             XGBoostPipeline  0.845902             False   \n",
-       "5   5    RFClassificationPipeline  0.842745             False   \n",
-       "6   4  LogisticRegressionPipeline  0.838806             False   \n",
-       "7   0  LogisticRegressionPipeline  0.838806             False   \n",
-       "8   7  LogisticRegressionPipeline  0.838806             False   \n",
-       "9   6  LogisticRegressionPipeline  0.838806             False   \n",
+       "0   2    RFClassificationPipeline  0.873053             False   \n",
+       "1   0             XGBoostPipeline  0.849826             False   \n",
+       "2   1             XGBoostPipeline  0.840634             False   \n",
+       "3   3             XGBoostPipeline  0.839091             False   \n",
+       "4   4  LogisticRegressionPipeline  0.831181             False   \n",
        "\n",
        "                                          parameters  \n",
        "0  {'n_estimators': 569, 'max_depth': 22, 'impute...  \n",
-       "1  {'eta': 0.38438170729269994, 'min_child_weight...  \n",
-       "2  {'n_estimators': 369, 'max_depth': 10, 'impute...  \n",
-       "3  {'n_estimators': 715, 'max_depth': 7, 'impute_...  \n",
-       "4  {'eta': 0.5928446182250184, 'min_child_weight'...  \n",
-       "5  {'n_estimators': 609, 'max_depth': 7, 'impute_...  \n",
-       "6  {'penalty': 'l2', 'C': 6.239401330891865, 'imp...  \n",
-       "7  {'penalty': 'l2', 'C': 8.444214828324364, 'imp...  \n",
-       "8  {'penalty': 'l2', 'C': 8.123565600467177, 'imp...  \n",
-       "9  {'penalty': 'l2', 'C': 0.5765626434012575, 'im...  "
+       "1  {'eta': 0.5928446182250184, 'min_child_weight'...  \n",
+       "2  {'eta': 0.38438170729269994, 'min_child_weight...  \n",
+       "3  {'eta': 0.5288949197529046, 'min_child_weight'...  \n",
+       "4  {'penalty': 'l2', 'C': 8.444214828324364, 'imp...  "
       ]
      },
      "execution_count": 12,
@@ -566,7 +560,7 @@
     {
      "data": {
       "text/plain": [
-       "<evalml.pipelines.classification.random_forest.RFClassificationPipeline at 0x12ba896d8>"
+       "<evalml.pipelines.classification.random_forest.RFClassificationPipeline at 0x122e2ed10>"
       ]
      },
      "execution_count": 13,
@@ -589,8 +583,8 @@
     {
      "data": {
       "text/plain": [
-       "(0.8619958322554153,\n",
-       " {'AUC': 0.8619958322554153, 'Fraud Cost': 0.03432590219090485})"
+       "(0.8745605699827037,\n",
+       " {'AUC': 0.8745605699827037, 'Fraud Cost': 0.03273490785793763})"
       ]
      },
      "execution_count": 14,
@@ -629,7 +623,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/roadmap.rst b/docs/source/roadmap.rst
index 69bd353677..fe25b6d3ef 100644
--- a/docs/source/roadmap.rst
+++ b/docs/source/roadmap.rst
@@ -12,4 +12,7 @@ There are numerous new features and functionality planned for EvalML, some of wh
 * Instructions for adding your own modeling pipelines for EvalML to tune
 * Add additional hyperparameter tuning methods
 * Handle categorical data natively within EvalML
+
+    * One-hot encoding has been incorparted natively within EvalMl
+
 * Visualizations for understanding model search

From fd321555b5394bc3d45a4550cea2ea06361db88b Mon Sep 17 00:00:00 2001
From: Jeremy Shih <jeremyliweishih@gmail.com>
Date: Wed, 18 Sep 2019 10:51:40 -0400
Subject: [PATCH 18/18] Remove from road map

---
 docs/source/roadmap.rst | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/docs/source/roadmap.rst b/docs/source/roadmap.rst
index fe25b6d3ef..29c6e48889 100644
--- a/docs/source/roadmap.rst
+++ b/docs/source/roadmap.rst
@@ -11,8 +11,4 @@ There are numerous new features and functionality planned for EvalML, some of wh
 * Ability to warm start from a previous pipeline search
 * Instructions for adding your own modeling pipelines for EvalML to tune
 * Add additional hyperparameter tuning methods
-* Handle categorical data natively within EvalML
-
-    * One-hot encoding has been incorparted natively within EvalMl
-
 * Visualizations for understanding model search