alteryx · jeremyliweishih · Oct 21, 2021 · Oct 21, 2021 · Oct 21, 2021 · Oct 21, 2021
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -10,6 +10,7 @@ Release Notes
     * Fixes
         * Fixed bug where partial dependence was not respecting the ww schema :pr:`2929`
         * Fixed ``calculate_permutation_importance`` for datetimes on ``StandardScaler`` :pr:`2938`
+        * Fixed ``SelectColumns`` to only select available features for feature selection in ``DefaultAlgorithm`` :pr:`2944`
     * Changes
         * Changed ``make_pipeline`` function to place the ``DateTimeFeaturizer`` prior to the ``Imputer`` so that ``NaN`` dates can be imputed :pr:`2909`
     * Documentation Changes

diff --git a/evalml/pipelines/components/transformers/column_selectors.py b/evalml/pipelines/components/transformers/column_selectors.py
@@ -101,7 +101,7 @@ class SelectColumns(ColumnSelector):
     """Selects specified columns in input data.
 
     Args:
-        columns (list(string)): List of column names, used to determine which columns to select.
+        columns (list(string)): List of column names, used to determine which columns to select. If columns are not present, they will not be selected.
         random_seed (int): Seed for the random number generator. Defaults to 0.
     """
 
@@ -110,20 +110,26 @@ class SelectColumns(ColumnSelector):
     """{}"""
     needs_fitting = False
 
-    def _modify_columns(self, cols, X, y=None):
-        return X.ww[cols]
+    def _check_input_for_columns(self, X):
+        pass
 
-    def transform(self, X, y=None):
-        """Transforms data X by selecting columns.
+    def fit(self, X, y=None):
+        """Fits the transformer by checking if column names are present in the dataset.
 
         Args:
-            X (pd.DataFrame): Data to transform.
+            X (pd.DataFrame): Data to check.
             y (pd.Series, optional): Targets.
 
         Returns:
-            pd.DataFrame: Transformed X.
+            self
         """
-        return super().transform(X, y)
+        return self
+
+    def _modify_columns(self, cols, X, y=None):
+        column_intersection = list(
+            sorted(set(cols).intersection(X.columns), key=cols.index)
+        )
+        return X.ww[column_intersection]
 
 
 class SelectByType(ColumnSelector):

diff --git a/evalml/tests/component_tests/test_column_selector_transformers.py b/evalml/tests/component_tests/test_column_selector_transformers.py
@@ -50,8 +50,9 @@ def test_column_transformer_empty_X(class_to_test):
         transformer = class_to_test(column_types=["not in data"])
     else:
         transformer = class_to_test(columns=["not in data"])
-    with pytest.raises(ValueError, match="not found in input data"):
-        transformer.fit(X)
+    if class_to_test is not SelectColumns:
+        with pytest.raises(ValueError, match="not found in input data"):
+            transformer.fit(X)
 
     transformer = class_to_test(columns=list(X.columns))
     assert transformer.transform(X).empty
@@ -187,10 +188,10 @@ def test_column_transformer_fit_transform(class_to_test, checking_functions):
         assert check3(X, class_to_test(columns=list(X.columns)).fit_transform(X))
 
 
-@pytest.mark.parametrize("class_to_test", [DropColumns, SelectColumns])
-def test_drop_column_transformer_input_invalid_col_name(class_to_test):
+def test_drop_column_transformer_input_invalid_col_name():
     X = pd.DataFrame({"one": [1, 2, 3, 4], "two": [2, 3, 4, 5], "three": [1, 2, 3, 4]})
-    transformer = class_to_test(columns=["not in data"])
+    transformer = DropColumns(columns=["not in data"])
+
     with pytest.raises(ValueError, match="not found in input data"):
         transformer.fit(X)
     with pytest.raises(ValueError, match="not found in input data"):
@@ -199,7 +200,7 @@ def test_drop_column_transformer_input_invalid_col_name(class_to_test):
         transformer.fit_transform(X)
 
     X = np.arange(12).reshape(3, 4)
-    transformer = class_to_test(columns=[5])
+    transformer = DropColumns(columns=[5])
     with pytest.raises(ValueError, match="not found in input data"):
         transformer.fit(X)
     with pytest.raises(ValueError, match="not found in input data"):
@@ -282,3 +283,11 @@ def test_typeortag_column_transformer_ww_logical_and_semantic_types():
 
     X_t = SelectByType(column_types=["numeric"]).fit_transform(X)
     assert X_t.astype(str).equals(X[["three", "four"]].astype(str))
+
+
+def test_column_selector_missing_columns():
+    selector = SelectColumns(columns=["A", "B", "C", "D"])
+    X = pd.DataFrame(columns=["A", "C", "F", "G"])
+
+    X_t = selector.fit_transform(X)
+    assert (X_t.columns == ["A", "C"]).all()