alteryx · jeremyliweishih · Feb 26, 2020 · Feb 19, 2020 · Feb 19, 2020 · Feb 19, 2020
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -15,6 +15,7 @@ Changelog
         * Remove unused parameter ObjectiveBase.fit_needs_proba :pr:`320`
         * Remove extraneous parameter component_type from all components :pr:`361`
         * Remove unused rankings.csv file :pr:`397`
+        * Remove `_needs_fitting` attribute from Components :pr:`398`
         * Changed plot.feature_importance to show only non-zero feature importances by default, added optional parameter to show all :pr:`413`
     * Documentation Changes
         * Update release.md with instructions to release to internal license key :pr:`354`

diff --git a/evalml/exceptions/__init__.py b/evalml/exceptions/__init__.py
@@ -0,0 +1,2 @@
+# flake8:noqa
+from .exceptions import MethodPropertyNotFoundError
diff --git a/evalml/exceptions/exceptions.py b/evalml/exceptions/exceptions.py
@@ -0,0 +1,3 @@
+class MethodPropertyNotFoundError(Exception):
+    """Exception to raise when a class is does not have an expected method or property."""
+    pass
diff --git a/evalml/pipelines/components/component_base.py b/evalml/pipelines/components/component_base.py
@@ -1,3 +1,4 @@
+from evalml.exceptions import MethodPropertyNotFoundError
 from evalml.utils import Logger
 
 
@@ -8,7 +9,7 @@ def __init__(self, parameters, component_obj, random_state):
         self.parameters = parameters
         self.logger = Logger()
 
-        attributes_to_check = ['_needs_fitting', "name"]
+        attributes_to_check = ["name"]
 
         for attribute in attributes_to_check:
             if not hasattr(self, attribute):
@@ -28,7 +29,7 @@ def fit(self, X, y=None):
             self._component_obj.fit(X, y)
             return self
         except AttributeError:
-            raise RuntimeError("Component requires a fit method or a component_obj that implements fit")
+            raise MethodPropertyNotFoundError("Component requires a fit method or a component_obj that implements fit")
 
     def describe(self, print_name=False, return_dict=False):
         """Describe a component and its parameters

diff --git a/evalml/pipelines/components/estimators/classifiers/catboost_classifier.py b/evalml/pipelines/components/estimators/classifiers/catboost_classifier.py
@@ -17,7 +17,6 @@ class CatBoostClassifier(Estimator):
     For more information, check out https://catboost.ai/
     """
     name = "CatBoost Classifier"
-    _needs_fitting = True
     hyperparameter_ranges = {
         "n_estimators": Integer(10, 1000),
         "eta": Real(0, 1),

diff --git a/evalml/pipelines/components/estimators/classifiers/logistic_regression.py b/evalml/pipelines/components/estimators/classifiers/logistic_regression.py
@@ -12,7 +12,6 @@ class LogisticRegressionClassifier(Estimator):
     Logistic Regression Classifier
     """
     name = "Logistic Regression Classifier"
-    _needs_fitting = True
     hyperparameter_ranges = {
         "penalty": ["l2"],
         "C": Real(.01, 10),

diff --git a/evalml/pipelines/components/estimators/classifiers/rf_classifier.py b/evalml/pipelines/components/estimators/classifiers/rf_classifier.py
@@ -9,7 +9,6 @@
 class RandomForestClassifier(Estimator):
     """Random Forest Classifier"""
     name = "Random Forest Classifier"
-    _needs_fitting = True
     hyperparameter_ranges = {
         "n_estimators": Integer(10, 1000),
         "max_depth": Integer(1, 32),

diff --git a/evalml/pipelines/components/estimators/classifiers/xgboost_classifier.py b/evalml/pipelines/components/estimators/classifiers/xgboost_classifier.py
@@ -9,7 +9,6 @@
 class XGBoostClassifier(Estimator):
     """XGBoost Classifier"""
     name = "XGBoost Classifier"
-    _needs_fitting = True
     hyperparameter_ranges = {
         "eta": Real(0, 1),
         "max_depth": Integer(1, 20),

diff --git a/evalml/pipelines/components/estimators/estimator.py b/evalml/pipelines/components/estimators/estimator.py
@@ -1,3 +1,4 @@
+from evalml.exceptions import MethodPropertyNotFoundError
 from evalml.pipelines.components import ComponentBase
 
 
@@ -16,7 +17,7 @@ def predict(self, X):
         try:
             return self._component_obj.predict(X)
         except AttributeError:
-            raise RuntimeError("Estimator requires a predict method or a component_obj that implements predict")
+            raise MethodPropertyNotFoundError("Estimator requires a predict method or a component_obj that implements predict")
 
     def predict_proba(self, X):
         """Make probability estimates for labels.
@@ -30,11 +31,11 @@ def predict_proba(self, X):
         try:
             return self._component_obj.predict_proba(X)
         except AttributeError:
-            raise RuntimeError("Estimator requires a predict_proba method or a component_obj that implements predict_proba")
+            raise MethodPropertyNotFoundError("Estimator requires a predict_proba method or a component_obj that implements predict_proba")
 
     @property
     def feature_importances(self):
         try:
             return self._component_obj.feature_importances_
         except AttributeError:
-            raise RuntimeError("Estimator requires a feature_importances property or a component_obj that implements feature_importances_")
+            raise MethodPropertyNotFoundError("Estimator requires a feature_importances property or a component_obj that implements feature_importances_")
diff --git a/evalml/pipelines/components/estimators/regressors/catboost_regressor.py b/evalml/pipelines/components/estimators/regressors/catboost_regressor.py
@@ -14,7 +14,6 @@ class CatBoostRegressor(Estimator):
     For more information, check out https://catboost.ai/
     """
     name = "CatBoost Regressor"
-    _needs_fitting = True
     hyperparameter_ranges = {
         "n_estimators": Integer(10, 1000),
         "eta": Real(0, 1),

diff --git a/evalml/pipelines/components/estimators/regressors/linear_regressor.py b/evalml/pipelines/components/estimators/regressors/linear_regressor.py
@@ -8,7 +8,6 @@
 class LinearRegressor(Estimator):
     """Linear Regressor"""
     name = "Linear Regressor"
-    _needs_fitting = True
     hyperparameter_ranges = {
         'fit_intercept': [True, False],
         'normalize': [True, False]

diff --git a/evalml/pipelines/components/estimators/regressors/rf_regressor.py b/evalml/pipelines/components/estimators/regressors/rf_regressor.py
@@ -9,7 +9,6 @@
 class RandomForestRegressor(Estimator):
     """Random Forest Regressor"""
     name = "Random Forest Regressor"
-    _needs_fitting = True
     hyperparameter_ranges = {
         "n_estimators": Integer(10, 1000),
         "max_depth": Integer(1, 32),

diff --git a/evalml/pipelines/components/transformers/encoders/onehot_encoder.py b/evalml/pipelines/components/transformers/encoders/onehot_encoder.py
@@ -7,7 +7,6 @@ class OneHotEncoder(CategoricalEncoder):
 
     """Creates one-hot encoding for non-numeric data"""
     name = 'One Hot Encoder'
-    _needs_fitting = True
     hyperparameter_ranges = {}
 
     def __init__(self):

diff --git a/evalml/pipelines/components/transformers/feature_selection/feature_selector.py b/evalml/pipelines/components/transformers/feature_selection/feature_selector.py
@@ -24,12 +24,12 @@ def get_names(self):
         selected_masks = self._component_obj.get_support()
         return [feature_name for (selected, feature_name) in zip(selected_masks, self.input_feature_names) if selected]
 
-    def transform(self, X):
-        """Transforms data X
+    def transform(self, X, y=None):
+        """Transforms data X by selecting features
 
         Arguments:
             X (pd.DataFrame): Data to transform
-
+            y (pd.Series, optional): Input Labels
         Returns:
             pd.DataFrame: Transformed X
         """
@@ -50,11 +50,11 @@ def transform(self, X):
             raise RuntimeError("Transformer requires a transform method or a component_obj that implements transform")
 
     def fit_transform(self, X, y=None):
-        """Fits on X and transforms X
+        """Fits feature selector on data X then transforms X by selecting features
 
         Arguments:
             X (pd.DataFrame): Data to fit and transform
-
+            y (pd.Series): Labels to fit and transform
         Returns:
             pd.DataFrame: Transformed X
         """

diff --git a/evalml/pipelines/components/transformers/feature_selection/rf_classifier_feature_selector.py b/evalml/pipelines/components/transformers/feature_selection/rf_classifier_feature_selector.py
@@ -9,7 +9,6 @@
 class RFClassifierSelectFromModel(FeatureSelector):
     """Selects top features based on importance weights using a Random Forest classifier"""
     name = 'RF Classifier Select From Model'
-    _needs_fitting = True
     hyperparameter_ranges = {
         "percent_features": Real(.01, 1),
         "threshold": ['mean', -np.inf]

diff --git a/evalml/pipelines/components/transformers/feature_selection/rf_regressor_feature_selector.py b/evalml/pipelines/components/transformers/feature_selection/rf_regressor_feature_selector.py
@@ -9,7 +9,6 @@
 class RFRegressorSelectFromModel(FeatureSelector):
     """Selects top features based on importance weights using a Random Forest regressor"""
     name = 'RF Regressor Select From Model'
-    _needs_fitting = True
     hyperparameter_ranges = {
         "percent_features": Real(.01, 1),
         "threshold": ['mean', -np.inf]

diff --git a/evalml/pipelines/components/transformers/imputers/simple_imputer.py b/evalml/pipelines/components/transformers/imputers/simple_imputer.py
@@ -7,7 +7,6 @@
 class SimpleImputer(Transformer):
     """Imputes missing data with either mean, median and most_frequent for numerical data or most_frequent for categorical data"""
     name = 'Simple Imputer'
-    _needs_fitting = True
     hyperparameter_ranges = {"impute_strategy": ["mean", "median", "most_frequent"]}
 
     def __init__(self, impute_strategy="most_frequent"):
@@ -17,14 +16,30 @@ def __init__(self, impute_strategy="most_frequent"):
                          component_obj=imputer,
                          random_state=0)
 
-    def transform(self, X):
+    def transform(self, X, y=None):
+        """Transforms data X by imputing missing values
+
+        Arguments:
+            X (pd.DataFrame): Data to transform
+            y (pd.Series, optional): Input Labels
+        Returns:
+            pd.DataFrame: Transformed X
+        """
         X_t = self._component_obj.transform(X)
         if not isinstance(X_t, pd.DataFrame) and isinstance(X, pd.DataFrame):
             # skLearn's SimpleImputer loses track of column type, so we need to restore
             X_t = pd.DataFrame(X_t, columns=X.columns, index=X.index).astype(X.dtypes.to_dict())
         return X_t
 
     def fit_transform(self, X, y=None):
+        """Fits imputer on data X then imputes missing values in X
+
+        Arguments:
+            X (pd.DataFrame): Data to fit and transform
+            y (pd.Series): Labels to fit and transform
+        Returns:
+            pd.DataFrame: Transformed X
+        """
         X_t = self._component_obj.fit_transform(X, y)
         if not isinstance(X_t, pd.DataFrame) and isinstance(X, pd.DataFrame):
             # skLearn's SimpleImputer loses track of column type, so we need to restore

diff --git a/evalml/pipelines/components/transformers/scalers/standard_scaler.py b/evalml/pipelines/components/transformers/scalers/standard_scaler.py
@@ -6,7 +6,6 @@
 class StandardScaler(Transformer):
     """Standardize features: removes mean and scales to unit variance"""
     name = "Standard Scaler"
-    _needs_fitting = True
     hyperparameter_ranges = {}
 
     def __init__(self):

diff --git a/evalml/pipelines/components/transformers/transformer.py b/evalml/pipelines/components/transformers/transformer.py
@@ -1,5 +1,6 @@
 import pandas as pd
 
+from evalml.exceptions import MethodPropertyNotFoundError
 from evalml.pipelines.components import ComponentBase
 
 
@@ -8,12 +9,12 @@ class Transformer(ComponentBase):
     These components are used before an estimator.
     """
 
-    def transform(self, X):
+    def transform(self, X, y=None):
         """Transforms data X
 
         Arguments:
             X (pd.DataFrame): Data to transform
-
+            y (pd.Series, optional): Input Labels
         Returns:
             pd.DataFrame: Transformed X
         """
@@ -23,21 +24,26 @@ def transform(self, X):
                 X_t = pd.DataFrame(X_t, columns=X.columns, index=X.index)
             return X_t
         except AttributeError:
-            raise RuntimeError("Transformer requires a transform method or a component_obj that implements transform")
+            raise MethodPropertyNotFoundError("Transformer requires a transform method or a component_obj that implements transform")
 
     def fit_transform(self, X, y=None):
         """Fits on X and transforms X
 
         Arguments:
             X (pd.DataFrame): Data to fit and transform
-
+            y (pd. DataFrame): Labels to fit and transform
         Returns:
             pd.DataFrame: Transformed X
         """
         try:
             X_t = self._component_obj.fit_transform(X, y)
-            if not isinstance(X_t, pd.DataFrame) and isinstance(X, pd.DataFrame):
-                X_t = pd.DataFrame(X_t, columns=X.columns, index=X.index)
-            return X_t
         except AttributeError:
-            raise RuntimeError("Transformer requires a fit_transform method or a component_obj that implements fit_transform")
+            try:
+                self.fit(X, y)
+                X_t = self.transform(X, y)
+            except MethodPropertyNotFoundError as e:
+                raise e
+
+        if not isinstance(X_t, pd.DataFrame) and isinstance(X, pd.DataFrame):
+            X_t = pd.DataFrame(X_t, columns=X.columns, index=X.index)
+        return X_t
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
@@ -134,10 +134,8 @@ def _fit(self, X, y):
         y_t = y
         for component in self.component_list[:-1]:
             self.input_feature_names.update({component.name: list(pd.DataFrame(X_t))})
-            if component._needs_fitting:
-                X_t = component.fit_transform(X_t, y_t)
-            else:
-                X_t = component.transform(X_t, y_t)
+            X_t = component.fit_transform(X_t, y_t)
+
         self.input_feature_names.update({self.estimator.name: list(pd.DataFrame(X_t))})
         self.estimator.fit(X_t, y_t)