From afd369da7e5e26f33cb1dfda37fcdeafe060acfe Mon Sep 17 00:00:00 2001
From: Jeremy <jeremyliweishih@gmail.com>
Date: Tue, 21 Feb 2023 15:04:24 -0500
Subject: [PATCH 1/5] first pass with test

---
 evalml/pipelines/utils.py                     | 69 +++++++++++++++++++
 .../pipeline_tests/test_pipeline_utils.py     | 24 +++++++
 2 files changed, 93 insertions(+)

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index e4d212e218..bfdbffb0dd 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -638,6 +638,75 @@ def generate_pipeline_code(element):
     return "\n".join(code_strings)
 
 
+def generate_pipeline_example(
+    pipeline,
+    path_to_train,
+    path_to_holdout,
+    target,
+    path_to_mapping="",
+    output_file_path=None,
+):
+    """Creates and returns a string that contains the Python imports and code required for running the EvalML pipeline.
+
+    Args:
+        pipeline (pipeline instance): The instance of the pipeline to generate string Python code.
+        path_to_train (str): path to training data.
+        path_to_holdout (str): path to holdout data.
+        target (str): target variable.
+        path_to_mapping (str): path to mapping json
+        output_file_path (str): path to output python file.
+
+    Returns:
+        str: String representation of Python code that can be run separately in order to recreate the pipeline instance.
+        Does not include code for custom component implementation.
+
+    """
+    output_str = f"""
+import evalml
+import woodwork
+import pandas as pd
+
+PATH_TO_TRAIN = "{path_to_train}"
+PATH_TO_HOLDOUT = "{path_to_holdout}"
+TARGET = "{target}"
+column_mapping = "{path_to_mapping}"
+
+# This is the machine learning pipeline you have exported.
+# By running this code you will fit the pipeline on the files provided
+# and you can then use this pipeline for prediction and model understanding.
+{generate_pipeline_code(pipeline)}
+
+print(pipeline.name)
+print(pipeline.parameters)
+pipeline.describe()
+
+df = pd.read_csv(PATH_TO_TRAIN)
+y_train = df[TARGET]
+X_train = df.drop(TARGET, axis=1)
+
+pipeline.fit(X_train, y_train)
+
+# You can now generate predictions as well as run model understanding.
+df = pd.read_csv(PATH_TO_HOLDOUT)
+y_holdout = df[TARGET]
+X_holdout= df.drop(TARGET, axis=1)
+
+pipeline.predict(X_holdout)
+
+# Note: to predict on new data you have on hand
+# Map the column names to AML internal names and run prediction
+# X_test = X_test.rename(column_mapping, axis=1)
+# pipeline.predict(X_test)
+
+# For more info please check out:
+# https://evalml.alteryx.com/en/stable/user_guide/automl.html
+  """
+    if output_file_path:
+        with open(output_file_path, "w") as text_file:
+            text_file.write(output_str)
+    return output_str
+
+
 def _make_stacked_ensemble_pipeline(
     input_pipelines,
     problem_type,
diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py
index 9bc8c82755..fdd06f83e6 100644
--- a/evalml/tests/pipeline_tests/test_pipeline_utils.py
+++ b/evalml/tests/pipeline_tests/test_pipeline_utils.py
@@ -1,3 +1,4 @@
+import os
 from unittest.mock import patch
 
 import numpy as np
@@ -45,6 +46,7 @@
     _get_preprocessing_components,
     _make_pipeline_from_multiple_graphs,
     generate_pipeline_code,
+    generate_pipeline_example,
     get_estimators,
     is_classification,
     is_regression,
@@ -827,6 +829,28 @@ def __init__(self, random_arg=False, random_seed=0):
     assert pipeline == expected_code
 
 
+def test_generate_pipeline_example(tmpdir, breast_cancer_local):
+    path = os.path.join(str(tmpdir), "train.csv")
+    X, y = breast_cancer_local
+
+    from evalml import AutoMLSearch
+
+    aml = AutoMLSearch(X_train=X, y_train=y, problem_type="binary")
+    aml.search()
+    binary_pipeline = aml.best_pipeline
+    # binary_pipeline = BinaryClassificationPipeline(
+    #     ["Imputer", "Random Forest Classifier"],
+    # )
+
+    X["target"] = y
+    X.to_csv(path)
+
+    pipeline_example = generate_pipeline_example(binary_pipeline, path, path, "target")
+    print(pipeline_example)
+    exec(pipeline_example)
+    assert pipeline_example
+
+
 def test_rows_of_interest_errors(X_y_binary):
     pipeline = BinaryClassificationPipeline(
         component_graph=["Logistic Regression Classifier"],

From b3c3315cfe164ae2d7c7064f3af30d821f800add Mon Sep 17 00:00:00 2001
From: Jeremy <jeremyliweishih@gmail.com>
Date: Tue, 21 Feb 2023 15:36:28 -0500
Subject: [PATCH 2/5] Cover all problem_types

---
 evalml/pipelines/utils.py                     | 19 +++++-
 .../pipeline_tests/test_pipeline_utils.py     | 63 ++++++++++++++++---
 2 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index bfdbffb0dd..9a59424277 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -690,7 +690,9 @@ def generate_pipeline_example(
 df = pd.read_csv(PATH_TO_HOLDOUT)
 y_holdout = df[TARGET]
 X_holdout= df.drop(TARGET, axis=1)
-
+"""
+    if not is_time_series(pipeline.problem_type):
+        output_str += """
 pipeline.predict(X_holdout)
 
 # Note: to predict on new data you have on hand
@@ -700,7 +702,20 @@ def generate_pipeline_example(
 
 # For more info please check out:
 # https://evalml.alteryx.com/en/stable/user_guide/automl.html
-  """
+"""
+    else:
+        output_str += """
+pipeline.predict(X_holdout, X_train=X_train, y_train=y_train)
+
+# Note: to predict on new data you have on hand
+# Map the column names to AML internal names and run prediction
+# X_test = X_test.rename(column_mapping, axis=1)
+# pipeline.predict(X_test, X_train=X_train, y_train=y_train)
+
+# For more info please check out:
+# https://evalml.alteryx.com/en/stable/user_guide/automl.html
+"""
+
     if output_file_path:
         with open(output_file_path, "w") as text_file:
             text_file.write(output_str)
diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py
index fdd06f83e6..0d56be6bbe 100644
--- a/evalml/tests/pipeline_tests/test_pipeline_utils.py
+++ b/evalml/tests/pipeline_tests/test_pipeline_utils.py
@@ -829,23 +829,68 @@ def __init__(self, random_arg=False, random_seed=0):
     assert pipeline == expected_code
 
 
-def test_generate_pipeline_example(tmpdir, breast_cancer_local):
+@pytest.mark.parametrize(
+    "automl_type",
+    [
+        ProblemTypes.BINARY,
+        ProblemTypes.MULTICLASS,
+        ProblemTypes.REGRESSION,
+        ProblemTypes.TIME_SERIES_REGRESSION,
+        ProblemTypes.TIME_SERIES_MULTICLASS,
+        ProblemTypes.TIME_SERIES_BINARY,
+    ],
+)
+def test_generate_pipeline_example(
+    automl_type,
+    tmpdir,
+    AutoMLTestEnv,
+    X_y_binary,
+    X_y_multi,
+    X_y_regression,
+    ts_data,
+):
     path = os.path.join(str(tmpdir), "train.csv")
-    X, y = breast_cancer_local
+    if automl_type == ProblemTypes.BINARY:
+        X, y = X_y_binary
+    elif automl_type == ProblemTypes.MULTICLASS:
+        X, y = X_y_multi
+    elif automl_type == ProblemTypes.REGRESSION:
+        X, y = X_y_regression
+    elif (
+        automl_type == ProblemTypes.TIME_SERIES_MULTICLASS
+        or automl_type == ProblemTypes.TIME_SERIES_BINARY
+    ):
+        X, _, y = ts_data(problem_type=automl_type)
+    else:
+        X, _, y = ts_data(problem_type=automl_type)
 
     from evalml import AutoMLSearch
 
-    aml = AutoMLSearch(X_train=X, y_train=y, problem_type="binary")
-    aml.search()
-    binary_pipeline = aml.best_pipeline
-    # binary_pipeline = BinaryClassificationPipeline(
-    #     ["Imputer", "Random Forest Classifier"],
-    # )
+    aml = AutoMLSearch(
+        X_train=X,
+        y_train=y,
+        problem_type=automl_type,
+        optimize_thresholds=False,
+        max_time=1,
+        max_iterations=5,
+        problem_configuration={
+            "time_index": "date",
+            "gap": 1,
+            "max_delay": 1,
+            "forecast_horizon": 3,
+        }
+        if is_time_series(automl_type)
+        else None,
+    )
+    env = AutoMLTestEnv(automl_type)
+    with env.test_context(score_return_value={aml.objective.name: 1.0}):
+        aml.search()
+    pipeline = aml.best_pipeline
 
     X["target"] = y
     X.to_csv(path)
 
-    pipeline_example = generate_pipeline_example(binary_pipeline, path, path, "target")
+    pipeline_example = generate_pipeline_example(pipeline, path, path, "target")
     print(pipeline_example)
     exec(pipeline_example)
     assert pipeline_example

From 574156a38a528d37c0cb46c7421f58f3a2deacbe Mon Sep 17 00:00:00 2001
From: Jeremy <jeremyliweishih@gmail.com>
Date: Tue, 21 Feb 2023 15:37:42 -0500
Subject: [PATCH 3/5] RL

---
 docs/source/release_notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 47478f4b08..9b155759dd 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -2,6 +2,7 @@ Release Notes
 -------------
 **Future Releases**
     * Enhancements
+        * Implement ``generate_pipeline_example`` :pr:`4023`
     * Fixes
     * Changes
         * Increase min catboost to 1.1.1 and xgboost to 1.7.0 to add nullable type support for those estimators :pr:`3996`

From a0b3d75ca56bb2bef8ade0c991a0c8fd5aa60146 Mon Sep 17 00:00:00 2001
From: Jeremy <jeremyliweishih@gmail.com>
Date: Wed, 22 Feb 2023 10:38:57 -0500
Subject: [PATCH 4/5] Assert file exists

---
 evalml/tests/pipeline_tests/test_pipeline_utils.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py
index 0d56be6bbe..4a2446f5fd 100644
--- a/evalml/tests/pipeline_tests/test_pipeline_utils.py
+++ b/evalml/tests/pipeline_tests/test_pipeline_utils.py
@@ -889,11 +889,16 @@ def test_generate_pipeline_example(
 
     X["target"] = y
     X.to_csv(path)
-
-    pipeline_example = generate_pipeline_example(pipeline, path, path, "target")
-    print(pipeline_example)
+    output_path = os.path.join(str(tmpdir), "example.py")
+    pipeline_example = generate_pipeline_example(
+        pipeline=pipeline,
+        path_to_train=path,
+        path_to_holdout=path,
+        target="target",
+        output_file_path=output_path,
+    )
     exec(pipeline_example)
-    assert pipeline_example
+    assert os.path.exists(output_path)
 
 
 def test_rows_of_interest_errors(X_y_binary):

From 6523ce7935512516badc49cbda6c9df430f3df6f Mon Sep 17 00:00:00 2001
From: Jeremy <jeremyliweishih@gmail.com>
Date: Fri, 24 Feb 2023 11:05:31 -0500
Subject: [PATCH 5/5] Add asserts

---
 evalml/pipelines/utils.py                          |  8 ++++----
 evalml/tests/pipeline_tests/test_pipeline_utils.py | 11 +++++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
index 3f6cf1fb34..a475c14ea6 100644
--- a/evalml/pipelines/utils.py
+++ b/evalml/pipelines/utils.py
@@ -703,8 +703,8 @@ def generate_pipeline_example(
         output_str += """
 pipeline.predict(X_holdout)
 
-# Note: to predict on new data you have on hand
-# Map the column names to AML internal names and run prediction
+# Note: if you have a column mapping, to predict on new data you have on hand
+# Map the column names and run prediction
 # X_test = X_test.rename(column_mapping, axis=1)
 # pipeline.predict(X_test)
 
@@ -715,8 +715,8 @@ def generate_pipeline_example(
         output_str += """
 pipeline.predict(X_holdout, X_train=X_train, y_train=y_train)
 
-# Note: to predict on new data you have on hand
-# Map the column names to AML internal names and run prediction
+# Note: if you have a column mapping, to predict on new data you have on hand
+# Map the column names and run prediction
 # X_test = X_test.rename(column_mapping, axis=1)
 # pipeline.predict(X_test, X_train=X_train, y_train=y_train)
 
diff --git a/evalml/tests/pipeline_tests/test_pipeline_utils.py b/evalml/tests/pipeline_tests/test_pipeline_utils.py
index fa4d435365..652c7fbb18 100644
--- a/evalml/tests/pipeline_tests/test_pipeline_utils.py
+++ b/evalml/tests/pipeline_tests/test_pipeline_utils.py
@@ -904,6 +904,17 @@ def test_generate_pipeline_example(
         target="target",
         output_file_path=output_path,
     )
+    assert f'PATH_TO_TRAIN = "{path}"' in pipeline_example
+    assert f'PATH_TO_HOLDOUT = "{path}"' in pipeline_example
+    assert 'TARGET = "target"' in pipeline_example
+    assert 'column_mapping = ""' in pipeline_example
+    assert generate_pipeline_code(pipeline) in pipeline_example
+
+    if is_time_series(automl_type):
+        assert "predict(X_test, X_train=X_train, y_train=y_train)" in pipeline_example
+    else:
+        assert "predict(X_test)" in pipeline_example
+
     exec(pipeline_example)
     assert os.path.exists(output_path)