alteryx · christopherbunn · Sep 17, 2020 · Sep 15, 2020 · Sep 15, 2020 · Sep 15, 2020
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -13,6 +13,7 @@ Release Notes
         * Added the corresponding probability threshold for each point displayed in `graph_roc_curve` :pr:`1161`
         * Added support for multiclass classification for `roc_curve` :pr:`1164`
         * Added `categories` accessor to `OneHotEncoder` for listing the categories associated with a feature :pr:`1182`
+        * Added utility function to create pipeline instances from a list of component instances :pr:`1176`
     * Fixes
         * Fixed XGBoost column names for partial dependence methods :pr:`1104`
         * Removed dead code validating column type from `TextFeaturizer` :pr:`1122`

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
@@ -12,6 +12,7 @@
     CatBoostRegressor,
     DateTimeFeaturizer,
     DropNullColumns,
+    Estimator,
     Imputer,
     OneHotEncoder,
     StandardScaler
@@ -60,6 +61,16 @@ def _get_preprocessing_components(X, y, problem_type, estimator_class):
     return pp_components
 
 
+def _get_pipeline_base_class(problem_type):
+    """Returns pipeline base class for problem_type"""
+    if problem_type == ProblemTypes.BINARY:
+        return BinaryClassificationPipeline
+    elif problem_type == ProblemTypes.MULTICLASS:
+        return MulticlassClassificationPipeline
+    elif problem_type == ProblemTypes.REGRESSION:
+        return RegressionPipeline
+
+
 def make_pipeline(X, y, estimator, problem_type):
     """Given input data, target data, an estimator class and the problem type,
         generates a pipeline class with a preprocessing chain which was recommended based on the inputs.
@@ -85,20 +96,40 @@ def make_pipeline(X, y, estimator, problem_type):
     if not isinstance(X, pd.DataFrame):
         X = pd.DataFrame(X)
 
-    def get_pipeline_base_class(problem_type):
-        """Returns pipeline base class for problem_type"""
-        if problem_type == ProblemTypes.BINARY:
-            return BinaryClassificationPipeline
-        elif problem_type == ProblemTypes.MULTICLASS:
-            return MulticlassClassificationPipeline
-        elif problem_type == ProblemTypes.REGRESSION:
-            return RegressionPipeline
-
-    base_class = get_pipeline_base_class(problem_type)
+    base_class = _get_pipeline_base_class(problem_type)
 
     class GeneratedPipeline(base_class):
         custom_name = f"{estimator.name} w/ {' + '.join([component.name for component in preprocessing_components])}"
         component_graph = complete_component_graph
         custom_hyperparameters = hyperparameters
 
     return GeneratedPipeline
+
+
+def make_pipeline_from_components(component_instances, problem_type, custom_name=None):
+    """Given a list of component instances and the problem type, a pipeline instance is generated with the component instances.
+    The pipeline will be a subclass of the appropriate pipeline base class for the specified problem_type. A custom name for
+    the pipeline can optionally be specified; otherwise the default pipeline name will be 'Templated Pipeline'.
+
+   Arguments:
+        component_instances (list): a list of all of the components to include in the pipeline
+        problem_type (str or ProblemTypes): problem type for the pipeline to generate
+        custom_name (string): a name for the new pipeline
+
+    Returns:
+        Pipeline instance with component instances and specified estimator
+
+    """
+    if not isinstance(component_instances[-1], Estimator):
+        raise ValueError("Pipeline needs to have an estimator at the last position of the component list")
+
+    pipeline_name = custom_name
+    problem_type = handle_problem_types(problem_type)
+
+    class TemplatedPipeline(_get_pipeline_base_class(problem_type)):
+        custom_name = pipeline_name
+        component_graph = [c.__class__ for c in component_instances]
+
+    pipeline_instance = TemplatedPipeline({})
+    pipeline_instance.component_graph = component_instances
+    return pipeline_instance
diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -27,6 +27,7 @@
     DropNullColumns,
     ElasticNetClassifier,
     ElasticNetRegressor,
+    Estimator,
     Imputer,
     LinearRegressor,
     LogisticRegressionClassifier,
@@ -41,7 +42,11 @@
     _all_estimators_used_in_search,
     allowed_model_families
 )
-from evalml.pipelines.utils import get_estimators, make_pipeline
+from evalml.pipelines.utils import (
+    get_estimators,
+    make_pipeline,
+    make_pipeline_from_components
+)
 from evalml.problem_types import ProblemTypes
 from evalml.utils.gen_utils import (
     categorical_dtypes,
@@ -240,6 +245,43 @@ def test_make_pipeline_problem_type_mismatch():
         make_pipeline(pd.DataFrame(), pd.Series(), Transformer, ProblemTypes.MULTICLASS)
 
 
+def test_make_pipeline_from_components():
+    with pytest.raises(ValueError, match="Pipeline needs to have an estimator at the last position of the component list"):
+        make_pipeline_from_components([Imputer], problem_type='binary')
+
+    imp = Imputer(numeric_impute_strategy='median')
+    est = RandomForestClassifier()
+    pipeline = make_pipeline_from_components([imp, est], ProblemTypes.BINARY, custom_name='My Pipeline')
+    components_list = pipeline.component_graph
+    assert components_list == [imp, est]
+    assert pipeline.problem_type == ProblemTypes.BINARY
+    assert pipeline.custom_name == 'My Pipeline'
+    expected_parameters = {
+        'Imputer': {
+            'categorical_impute_strategy': 'most_frequent',
+            'numeric_impute_strategy': 'median',
+            'categorical_fill_value': None,
+            'numeric_fill_value': None},
+        'Random Forest Classifier': {
+            'n_estimators': 100,
+            'max_depth': 6,
+            'n_jobs': -1}
+    }
+    assert pipeline.parameters == expected_parameters
+
+    class DummyEstimator(Estimator):
+        name = "Dummy!"
+        model_family = "foo"
+        supported_problem_types = [ProblemTypes.BINARY]
+        parameters = {'bar': 'baz'}
+    pipeline = make_pipeline_from_components([DummyEstimator()], ProblemTypes.BINARY)
+    components_list = pipeline.component_graph
+    assert len(components_list) == 1
+    assert isinstance(components_list[0], DummyEstimator)
+    expected_parameters = {'Dummy!': {'bar': 'baz'}}
+    assert pipeline.parameters == expected_parameters
+
+
 def test_required_fields():
     class TestPipelineWithoutComponentGraph(PipelineBase):
         pass