alteryx · angela97lin · Oct 31, 2019 · Oct 28, 2019 · Oct 28, 2019 · Oct 28, 2019
diff --git a/evalml/pipelines/classification/logistic_regression.py b/evalml/pipelines/classification/logistic_regression.py
@@ -36,5 +36,4 @@ def __init__(self, objective, penalty, C, impute_strategy,
 
         super().__init__(objective=objective,
                          name=self.name,
-                         problem_type=self.problem_types,
                          component_list=[enc, imputer, scaler, estimator])
diff --git a/evalml/pipelines/classification/random_forest.py b/evalml/pipelines/classification/random_forest.py
@@ -44,5 +44,4 @@ def __init__(self, objective, n_estimators, max_depth, impute_strategy,
 
         super().__init__(objective=objective,
                          name=self.name,
-                         problem_type=self.problem_types,
                          component_list=[enc, imputer, feature_selection, estimator])
diff --git a/evalml/pipelines/classification/xgboost.py b/evalml/pipelines/classification/xgboost.py
@@ -47,5 +47,4 @@ def __init__(self, objective, eta, min_child_weight, max_depth, impute_strategy,
 
         super().__init__(objective=objective,
                          name=self.name,
-                         problem_type=self.problem_types,
                          component_list=[enc, imputer, feature_selection, estimator])
diff --git a/evalml/pipelines/components/component_base.py b/evalml/pipelines/components/component_base.py
@@ -36,4 +36,7 @@ def describe(self, print_name=False, return_dict=False):
             parameter_str = ("\t * {} : {}").format(parameter, self.parameters[parameter])
             self.logger.log(parameter_str)
         if return_dict:
-            return self.parameters
+            component_dict = {}
+            component_dict.update({"name": self.name})
+            component_dict.update({"parameters": self.parameters})
+            return component_dict
diff --git a/evalml/pipelines/components/estimators/estimator.py b/evalml/pipelines/components/estimators/estimator.py
@@ -1,4 +1,3 @@
-
 from evalml.pipelines.components import ComponentBase
 
 

diff --git a/evalml/pipelines/components/transformers/feature_selection/feature_selector.py b/evalml/pipelines/components/transformers/feature_selection/feature_selector.py
@@ -7,3 +7,15 @@ class FeatureSelector(Transformer):
     def get_indices(self):
         indices = self._component_obj.get_support(indices=True)
         return indices
+
+    def get_names(self, all_feature_names):
+        """Get names of selected features.
+
+        Args:
+            all_feature_names: feature names
+
+        Returns:
+            list of the names of features selected
+        """
+        indices = self.get_indices()
+        return list(map(lambda i: all_feature_names[i], indices))
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
@@ -3,23 +3,21 @@
 import pandas as pd
 from sklearn.model_selection import train_test_split
 
-from .components import Encoder, Estimator, FeatureSelector, handle_component
+from .components import Estimator, handle_component
 
 from evalml.objectives import get_objective
 from evalml.utils import Logger
 
 
 class PipelineBase:
-    def __init__(self, name, objective, component_list, problem_type=None, n_jobs=-1, random_state=0):
+    def __init__(self, name, objective, component_list, n_jobs=-1, random_state=0):
         """Machine learning pipeline made out of transformers and a estimator.
 
         Arguments:
             objective (Object): the objective to optimize
 
             component_list (list): List of components in order
 
-            problem_type (ProblemTypes): Machine learning problem associated with the pipeline
-
             random_state (int): random seed/state
 
             n_jobs (int): Number of jobs to run in parallel
@@ -29,18 +27,17 @@ def __init__(self, name, objective, component_list, problem_type=None, n_jobs=-1
         self.random_state = random_state
         self.component_list = [handle_component(component) for component in component_list]
         self.component_names = [comp.name for comp in self.component_list]
-        self.name = self._generate_name()  # autogenerated
-
+        self.input_feature_names = {}
         # check if one and only estimator in pipeline is the last element in component_list
-        estimator = next((component for component in self.component_list if (isinstance(component, Estimator))), None)
-        if estimator is not None:
-            self.estimator = estimator
-            self.problem_types = estimator.problem_types
-            self.model_type = estimator.model_type
-            estimator_index = self.component_list.index(estimator)
+        self.estimator = next((component for component in self.component_list if (isinstance(component, Estimator))), None)
+        if self.estimator is not None:
+            self.problem_types = self.estimator.problem_types
+            self.model_type = self.estimator.model_type
+            estimator_index = self.component_list.index(self.estimator)
             if estimator_index != len(self.component_list) - 1:
                 raise RuntimeError("Estimator must be the last component in the pipeline.")
 
+        self.name = self._generate_name()  # autogenerated
         self.results = {}
         self.n_jobs = n_jobs
         self.parameters = {}
@@ -64,7 +61,10 @@ def __setitem__(self, index, value):
         raise NotImplementedError('Setting pipeline components is not supported.')
 
     def _generate_name(self):
-        name = "{}".format(self.component_list[-1].name)
+        if self.estimator is not None:
+            name = "{}".format(self.estimator.name)
+        else:
+            name = "Pipeline"
         for index, component in enumerate(self.component_list[:-1]):
             if index == 0:
                 name += " w/ {}".format(component.name)
@@ -113,11 +113,13 @@ def _fit(self, X, y):
         X_t = X
         y_t = y
         for component in self.component_list[:-1]:
+            self.input_feature_names.update({component.name: list(pd.DataFrame(X_t))})
             if component._needs_fitting:
                 X_t = component.fit_transform(X_t, y_t)
             else:
                 X_t = component.transform(X_t, y_t)
-        self.component_list[-1].fit(X_t, y_t)
+        self.input_feature_names.update({self.estimator.name: list(pd.DataFrame(X_t))})
+        self.estimator.fit(X_t, y_t)
 
     def fit(self, X, y, objective_fit_size=.2):
         """Build a model
@@ -146,12 +148,6 @@ def fit(self, X, y, objective_fit_size=.2):
 
         self._fit(X, y)
 
-        encoder = next((component for component in self.component_list if (isinstance(component, Encoder))), None)
-        if encoder is not None:
-            self.input_feature_names = encoder.get_feature_names()
-        else:
-            self.input_feature_names = X.columns.tolist()
-
         if self.objective.needs_fitting:
             if self.objective.fit_needs_proba:
                 y_predicted = self.predict_proba(X_objective)
@@ -181,14 +177,14 @@ def predict(self, X):
                 y_predicted = self.predict_proba(X)
             else:
                 X_t = self._transform(X)
-                y_predicted = self.component_list[-1].predict(X_t)
+                y_predicted = self.estimator.predict(X_t)
 
             if self.objective.uses_extra_columns:
                 return self.objective.predict(y_predicted, X)
 
             return self.objective.predict(y_predicted)
 
-        return self.component_list[-1].predict(X_t)
+        return self.estimator.predict(X_t)
 
     def predict_proba(self, X):
         """Make probability estimates for labels.
@@ -200,7 +196,7 @@ def predict_proba(self, X):
             DataFrame : probability estimates
         """
         X = self._transform(X)
-        proba = self.component_list[-1].predict_proba(X)
+        proba = self.estimator.predict_proba(X)
         if proba.shape[1] <= 2:
             return proba[:, 1]
         else:
@@ -247,11 +243,7 @@ def score(self, X, y, other_objectives=None):
     @property
     def feature_importances(self):
         """Return feature importances. Feature dropped by feaure selection are excluded"""
-        feature_selector = next((component for component in self.component_list if (isinstance(component, FeatureSelector))), None)
-        feature_names = self.input_feature_names
-        if feature_selector is not None:
-            indices = feature_selector.get_indices()
-            feature_names = list(map(lambda i: self.input_feature_names[i], indices))
+        feature_names = self.input_feature_names[self.estimator.name]
         importances = list(zip(feature_names, self.estimator.feature_importances))  # note: this only works for binary
         importances.sort(key=lambda x: -abs(x[1]))
         df = pd.DataFrame(importances, columns=["feature", "importance"])

diff --git a/evalml/pipelines/regression/linear_regression.py b/evalml/pipelines/regression/linear_regression.py
@@ -30,5 +30,4 @@ def __init__(self, objective, random_state, number_features, impute_strategy, no
 
         super().__init__(objective=objective,
                          name=self.name,
-                         problem_type=self.problem_types,
                          component_list=[enc, imputer, scaler, estimator])
diff --git a/evalml/pipelines/regression/random_forest.py b/evalml/pipelines/regression/random_forest.py
@@ -42,5 +42,4 @@ def __init__(self, objective, n_estimators, max_depth, impute_strategy, percent_
 
         super().__init__(objective=objective,
                          name=self.name,
-                         problem_type=self.problem_types,
                          component_list=[enc, imputer, feature_selection, estimator])
diff --git a/evalml/problem_types/utils.py b/evalml/problem_types/utils.py
@@ -10,7 +10,6 @@ def handle_problem_types(problem_type):
     Returns:
         ProblemTypes
     """
-
     if isinstance(problem_type, str):
         try:
             tpe = ProblemTypes[problem_type.upper()]

diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py
@@ -45,22 +45,22 @@ def test_describe_component():
     imputer = SimpleImputer("mean")
     scaler = StandardScaler()
     feature_selection = RFClassifierSelectFromModel(n_estimators=10, number_features=5, percent_features=0.3, threshold=10)
-    assert enc.describe(return_dict=True) == {}
-    assert imputer.describe(return_dict=True) == {"impute_strategy": "mean"}
-    assert scaler.describe(return_dict=True) == {}
-    assert feature_selection.describe(return_dict=True) == {"percent_features": 0.3, "threshold": 10}
+    assert enc.describe(return_dict=True) == {'name': 'One Hot Encoder', 'parameters': {}}
+    assert imputer.describe(return_dict=True) == {'name': 'Simple Imputer', 'parameters': {'impute_strategy': 'mean'}}
+    assert scaler.describe(return_dict=True) == {'name': 'Standard Scaler', 'parameters': {}}
+    assert feature_selection.describe(return_dict=True) == {'name': 'RF Select From Model', 'parameters': {'percent_features': 0.3, 'threshold': 10}}
 
     # testing estimators
     lr_classifier = LogisticRegressionClassifier()
     rf_classifier = RandomForestClassifier(n_estimators=10, max_depth=3)
     xgb_classifier = XGBoostClassifier(eta=0.1, min_child_weight=1, max_depth=3)
     rf_regressor = RandomForestRegressor(n_estimators=10, max_depth=3)
     linear_regressor = LinearRegressor()
-    assert lr_classifier.describe(return_dict=True) == {"penalty": "l2", "C": 1.0}
-    assert rf_classifier.describe(return_dict=True) == {"n_estimators": 10, "max_depth": 3}
-    assert xgb_classifier.describe(return_dict=True) == {"eta": 0.1, "max_depth": 3, "min_child_weight": 1}
-    assert rf_regressor.describe(return_dict=True) == {"n_estimators": 10, "max_depth": 3}
-    assert linear_regressor.describe(return_dict=True) == {"fit_intercept": True, 'normalize': False}
+    assert lr_classifier.describe(return_dict=True) == {'name': 'Logistic Regression Classifier', 'parameters': {'C': 1.0, 'penalty': 'l2'}}
+    assert rf_classifier.describe(return_dict=True) == {'name': 'Random Forest Classifier', 'parameters': {'max_depth': 3, 'n_estimators': 10}}
+    assert xgb_classifier.describe(return_dict=True) == {'name': 'XGBoost Classifier', 'parameters': {'eta': 0.1, 'max_depth': 3, 'min_child_weight': 1}}
+    assert rf_regressor.describe(return_dict=True) == {'name': 'Random Forest Regressor', 'parameters': {'max_depth': 3, 'n_estimators': 10}}
+    assert linear_regressor.describe(return_dict=True) == {'name': 'Linear Regressor', 'parameters': {'fit_intercept': True, 'normalize': False}}
 
 
 def test_missing_methods_on_components(X_y):

diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -131,7 +131,7 @@ def __init__(self, objective, penalty, C, impute_strategy,
                                                      penalty=penalty,
                                                      C=C,
                                                      n_jobs=-1)
-            super().__init__(objective=objective, name=self.name, problem_type=[ProblemTypes.BINARY, ProblemTypes.MULTICLASS], component_list=[enc, imputer, estimator, scaler])
+            super().__init__(objective=objective, name=self.name, component_list=[enc, imputer, estimator, scaler])
 
     err_msg = "Estimator must be the last component in the pipeline."
     with pytest.raises(RuntimeError, match=err_msg):