alteryx · freddyaboulton · Jun 24, 2021 · Jun 22, 2021 · Jun 22, 2021 · Jun 23, 2021
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -101,6 +101,8 @@
 html_theme_options = {
     "github_url": "https://github.com/alteryx/evalml",
     "twitter_url": "https://twitter.com/AlteryxOSS",
+    "collapse_navigation": True,
+    "navigation_depth": 2,
 }
 
 # The name of an image file (relative to this directory) to place at the top

diff --git a/docs/source/demos/cost_benefit_matrix.ipynb b/docs/source/demos/cost_benefit_matrix.ipynb
@@ -122,7 +122,8 @@
    "outputs": [],
    "source": [
     "from evalml import AutoMLSearch\n",
-    "automl = AutoMLSearch(X_train=X_train, y_train=y_train, problem_type='binary', objective='log loss binary')\n",
+    "automl = AutoMLSearch(X_train=X_train, y_train=y_train, problem_type='binary', objective='log loss binary',\n",
+    "                      max_iterations=5)\n",
     "automl.search()\n",
     "\n",
     "ll_pipeline = automl.best_pipeline\n",
@@ -172,7 +173,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "automl = AutoMLSearch(X_train=X_train, y_train=y_train, problem_type='binary', objective=cost_benefit_matrix)\n",
+    "automl = AutoMLSearch(X_train=X_train, y_train=y_train, problem_type='binary', objective=cost_benefit_matrix,\n",
+    "                      max_iterations=5)\n",
     "automl.search()\n",
     "\n",
     "cbm_pipeline = automl.best_pipeline"

diff --git a/docs/source/demos/fraud.ipynb b/docs/source/demos/fraud.ipynb
@@ -110,6 +110,7 @@
     "                      problem_type='binary', \n",
     "                      objective=fraud_objective,\n",
     "                      additional_objectives=['auc', 'f1', 'precision'],\n",
+    "                      allowed_model_families=[\"random_forest\", \"linear_model\"],\n",
     "                      max_batches=1,\n",
     "                      optimize_thresholds=True)\n",
     "\n",
@@ -213,6 +214,7 @@
     "                          objective='auc',\n",
     "                          additional_objectives=['f1', 'precision'],\n",
     "                          max_batches=1,\n",
+    "                          allowed_model_families=[\"random_forest\", \"linear_model\"],\n",
     "                          optimize_thresholds=True)\n",
     "\n",
     "automl_auc.search()"

diff --git a/docs/source/demos/lead_scoring.ipynb b/docs/source/demos/lead_scoring.ipynb
@@ -133,7 +133,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Because the lead scoring labels are binary, we will use `AutoMLSearch(X_train=X_train, y_train=y_train, problem_type='binary')`. When we call `.search()`, the search for the best pipeline will begin. "
+    "Because the lead scoring labels are binary, we will use set the problem type to \"binary\". When we call `.search()`, the search for the best pipeline will begin. "
    ]
   },
   {
@@ -146,6 +146,7 @@
     "                      problem_type='binary',\n",
     "                      objective=lead_scoring_objective,\n",
     "                      additional_objectives=['auc'],\n",
+    "                      allowed_model_families=[\"catboost\", \"random_forest\", \"linear_model\"],\n",
     "                      max_batches=1)\n",
     "\n",
     "automl.search()"
@@ -241,6 +242,7 @@
     "                                 problem_type='binary',\n",
     "                                 objective='auc',\n",
     "                                 additional_objectives=[lead_scoring_objective],\n",
+    "                                 allowed_model_families=[\"catboost\", \"random_forest\", \"linear_model\"],\n",
     "                                 max_batches=1)\n",
     "\n",
     "automl_auc.search()"

diff --git a/docs/source/demos/text_input.ipynb b/docs/source/demos/text_input.ipynb
@@ -38,7 +38,7 @@
     "import pandas as pd\n",
     "\n",
     "input_data = urlopen('https://featurelabs-static.s3.amazonaws.com/spam_text_messages_modified.csv')\n",
-    "data = pd.read_csv(input_data)\n",
+    "data = pd.read_csv(input_data)[:750]\n",
     "\n",
     "X = data.drop(['Category'], axis=1)\n",
     "y = data['Category']\n",

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -7,6 +7,7 @@ Release Notes
     * Changes
         * Updated psutils minimum version in requirements :pr:`2438`
     * Documentation Changes
+        * Sped up docs :pr:`2430`
     * Testing Changes
 
 .. warning::

diff --git a/docs/source/start.ipynb b/docs/source/start.ipynb
@@ -38,7 +38,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X, y = evalml.demos.load_fraud(n_rows=1000)"
+    "X, y = evalml.demos.load_fraud(n_rows=250)"
    ]
   },
   {

diff --git a/docs/source/user_guide/automl.ipynb b/docs/source/user_guide/automl.ipynb
@@ -53,7 +53,7 @@
    "source": [
     "import evalml\n",
     "from evalml.utils import infer_feature_types\n",
-    "X, y = evalml.demos.load_fraud(n_rows=1000)"
+    "X, y = evalml.demos.load_fraud(n_rows=250)"
    ]
   },
   {
@@ -607,10 +607,11 @@
    "outputs": [],
    "source": [
     "X, y = evalml.demos.load_breast_cancer()\n",
+    "\n",
     "automl_with_ensembling = AutoMLSearch(X_train=X, y_train=y,\n",
     "                                      problem_type=\"binary\",\n",
-    "                                      allowed_model_families=[ModelFamily.RANDOM_FOREST, ModelFamily.LINEAR_MODEL],\n",
-    "                                      max_batches=5,\n",
+    "                                      allowed_model_families=[ModelFamily.LINEAR_MODEL],\n",
+    "                                      max_batches=4,\n",
     "                                      ensembling=True)\n",
     "automl_with_ensembling.search()"
    ]

diff --git a/docs/source/user_guide/model_understanding.ipynb b/docs/source/user_guide/model_understanding.ipynb
@@ -40,9 +40,13 @@
     "from evalml.pipelines import BinaryClassificationPipeline\n",
     "X, y = evalml.demos.load_breast_cancer()\n",
     "\n",
+    "X_train, X_holdout, y_train, y_holdout = evalml.preprocessing.split_data(X, y, problem_type='binary',\n",
+    "                                                                         test_size=0.2, random_seed=0)\n",
+    "\n",
+    "\n",
     "pipeline_binary = BinaryClassificationPipeline(['Simple Imputer', 'Random Forest Classifier'])\n",
-    "pipeline_binary.fit(X, y)\n",
-    "print(pipeline_binary.score(X, y, objectives=['log loss binary']))"
+    "pipeline_binary.fit(X_train, y_train)\n",
+    "print(pipeline_binary.score(X_holdout, y_holdout, objectives=['log loss binary']))"
    ]
   },
   {
@@ -95,7 +99,7 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding import calculate_permutation_importance\n",
-    "calculate_permutation_importance(pipeline_binary, X, y, 'log loss binary')"
+    "calculate_permutation_importance(pipeline_binary, X_holdout, y_holdout, 'log loss binary')"
    ]
   },
   {
@@ -105,7 +109,7 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding import graph_permutation_importance\n",
-    "graph_permutation_importance(pipeline_binary, X, y, 'log loss binary')"
+    "graph_permutation_importance(pipeline_binary, X_holdout, y_holdout, 'log loss binary')"
    ]
   },
   {
@@ -123,7 +127,7 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding.graphs import partial_dependence\n",
-    "partial_dependence(pipeline_binary, X, features='mean radius')"
+    "partial_dependence(pipeline_binary, X_holdout, features='mean radius', grid_resolution=5)"
    ]
   },
   {
@@ -133,7 +137,7 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding.graphs import graph_partial_dependence\n",
-    "graph_partial_dependence(pipeline_binary, X, features='mean radius')"
+    "graph_partial_dependence(pipeline_binary, X_holdout, features='mean radius', grid_resolution=5)"
    ]
   },
   {
@@ -171,7 +175,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "partial_dependence(pipeline_binary, X, features=('worst perimeter', 'worst radius'), grid_resolution=10)"
+    "partial_dependence(pipeline_binary, X_holdout, features=('worst perimeter', 'worst radius'), grid_resolution=5)"
    ]
   },
   {
@@ -180,7 +184,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "graph_partial_dependence(pipeline_binary, X, features=('worst perimeter', 'worst radius'), grid_resolution=10)"
+    "graph_partial_dependence(pipeline_binary, X_holdout, features=('worst perimeter', 'worst radius'), grid_resolution=5)"
    ]
   },
   {
@@ -199,8 +203,8 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding.graphs import confusion_matrix\n",
-    "y_pred = pipeline_binary.predict(X)\n",
-    "confusion_matrix(y, y_pred)"
+    "y_pred = pipeline_binary.predict(X_holdout)\n",
+    "confusion_matrix(y_holdout, y_pred)"
    ]
   },
   {
@@ -210,8 +214,8 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding.graphs import graph_confusion_matrix\n",
-    "y_pred = pipeline_binary.predict(X)\n",
-    "graph_confusion_matrix(y, y_pred)"
+    "y_pred = pipeline_binary.predict(X_holdout)\n",
+    "graph_confusion_matrix(y_holdout, y_pred)"
    ]
   },
   {
@@ -232,8 +236,8 @@
     "from evalml.model_understanding.graphs import graph_precision_recall_curve\n",
     "# get the predicted probabilities associated with the \"true\" label\n",
     "import woodwork as ww\n",
-    "y_encoded = y.ww.map({'benign': 0, 'malignant': 1})\n",
-    "y_pred_proba = pipeline_binary.predict_proba(X)[\"malignant\"]\n",
+    "y_encoded = y_holdout.ww.map({'benign': 0, 'malignant': 1})\n",
+    "y_pred_proba = pipeline_binary.predict_proba(X_holdout)[\"malignant\"]\n",
     "graph_precision_recall_curve(y_encoded, y_pred_proba)"
    ]
   },
@@ -254,7 +258,7 @@
    "source": [
     "from evalml.model_understanding.graphs import graph_roc_curve\n",
     "# get the predicted probabilities associated with the \"malignant\" label\n",
-    "y_pred_proba = pipeline_binary.predict_proba(X)[\"malignant\"]\n",
+    "y_pred_proba = pipeline_binary.predict_proba(X_holdout)[\"malignant\"]\n",
     "graph_roc_curve(y_encoded, y_pred_proba)"
    ]
   },
@@ -297,7 +301,7 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding.graphs import binary_objective_vs_threshold\n",
-    "binary_objective_vs_threshold(pipeline_binary, X, y, 'f1', steps=100)"
+    "binary_objective_vs_threshold(pipeline_binary, X_holdout, y_holdout, 'f1', steps=10)"
    ]
   },
   {
@@ -307,7 +311,7 @@
    "outputs": [],
    "source": [
     "from evalml.model_understanding.graphs import graph_binary_objective_vs_threshold\n",
-    "graph_binary_objective_vs_threshold(pipeline_binary, X, y, 'f1', steps=100)"
+    "graph_binary_objective_vs_threshold(pipeline_binary, X_holdout, y_holdout, 'f1', steps=100)"
    ]
   },
   {
@@ -352,7 +356,7 @@
    "outputs": [],
    "source": [
     "pipeline_dt = BinaryClassificationPipeline(['Simple Imputer', 'Decision Tree Classifier'])\n",
-    "pipeline_dt.fit(X, y)"
+    "pipeline_dt.fit(X_train, y_train)"
    ]
   },
   {
@@ -401,7 +405,7 @@
    "source": [
     "from evalml.model_understanding.prediction_explanations import explain_predictions\n",
     "\n",
-    "table = explain_predictions(pipeline=pipeline_binary, input_features=X, y=None, indices_to_explain=[3],\n",
+    "table = explain_predictions(pipeline=pipeline_binary, input_features=X_holdout, y=None, indices_to_explain=[3],\n",
     "                           top_k_features=6, include_shap_values=True)\n",
     "print(table)"
    ]
@@ -423,8 +427,9 @@
    "source": [
     "from evalml.model_understanding.prediction_explanations import explain_predictions\n",
     "\n",
-    "report = explain_predictions(pipeline=pipeline_binary, input_features=X, y=y, indices_to_explain=[0, 4, 9], include_shap_values=True,\n",
-    "                            output_format='text')\n",
+    "report = explain_predictions(pipeline=pipeline_binary,\n",
+    "                             input_features=X_holdout, y=y_holdout, indices_to_explain=[0, 4, 9], include_shap_values=True,\n",
+    "                             output_format='text')\n",
     "print(report)"
    ]
   },
@@ -451,7 +456,7 @@
    "source": [
     "from evalml.model_understanding.prediction_explanations import explain_predictions_best_worst\n",
     "\n",
-    "report = explain_predictions_best_worst(pipeline=pipeline_binary, input_features=X, y_true=y,\n",
+    "report = explain_predictions_best_worst(pipeline=pipeline_binary, input_features=X_holdout, y_true=y_holdout,\n",
     "                                        include_shap_values=True, top_k_features=6, num_to_explain=2)\n",
     "\n",
     "print(report)"
@@ -503,9 +508,9 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "single_prediction_report = explain_predictions(pipeline=pipeline_binary, input_features=X, indices_to_explain=[3],\n",
-    "                                              y=y, top_k_features=6, include_shap_values=True,\n",
-    "                                              output_format=\"dict\")\n",
+    "single_prediction_report = explain_predictions(pipeline=pipeline_binary, input_features=X_holdout, indices_to_explain=[3],\n",
+    "                                               y=y_holdout, top_k_features=6, include_shap_values=True,\n",
+    "                                               output_format=\"dict\")\n",
     "print(json.dumps(single_prediction_report, indent=2))"
    ]
   },
@@ -522,9 +527,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "single_prediction_report = explain_predictions(pipeline=pipeline_binary, input_features=X, indices_to_explain=[3],\n",
-    "                                              y=y, top_k_features=6, include_shap_values=True,\n",
-    "                                              output_format=\"dataframe\")\n",
+    "single_prediction_report = explain_predictions(pipeline=pipeline_binary, input_features=X_holdout,\n",
+    "                                               indices_to_explain=[3],\n",
+    "                                               y=y_holdout, top_k_features=6, include_shap_values=True,\n",
+    "                                               output_format=\"dataframe\")\n",
     "single_prediction_report"
    ]
   },
@@ -560,7 +566,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "report = explain_predictions_best_worst(pipeline=pipeline_binary, input_features=X, y_true=y,\n",
+    "report = explain_predictions_best_worst(pipeline=pipeline_binary, input_features=X_holdout, y_true=y_holdout,\n",
     "                                        num_to_explain=1, top_k_features=6,\n",
     "                                        include_shap_values=True, output_format=\"dataframe\")\n",
     "report"
@@ -587,7 +593,7 @@
     "rows_to_explain = [0] # Should be a list of integer indices of the rows to explain.\n",
     "\n",
     "results = graph_force_plot(pipeline_binary, rows_to_explain=rows_to_explain, \n",
-    "                           training_data=X, y=y)\n",
+    "                           training_data=X_holdout, y=y_holdout)\n",
     "\n",
     "for result in results:\n",
     "    for cls in result:\n",
@@ -620,13 +626,6 @@
     "        print(\"Class:\", cls)\n",
     "        display(result[cls][\"plot\"])"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {