adds NotebookCollection tutorial

edublancas · Dec 22, 2020 · 78af03b · 78af03b
1 parent 8746694
commit 78af03b
Show file tree

Hide file tree

Showing 8 changed files with 85 additions and 1,115 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,13 @@
 .nox
+*.ipynb
 .vscode
 .DS_Store
 .coverage*
 
-#### joe made this: https://goel.io/joe
+
+# ploomber-generated files when building docs
+*.source
+
 
 # Do not include test output
 sklearn_evaluation/tests/result_images/

diff --git a/docs/source/hooks.py b/docs/source/hooks.py
@@ -20,4 +20,10 @@ def config_init(app, config):
                    dag=dag,
                    kernelspec_name='python3')
 
+    NotebookRunner(base_path / 'nbs/NotebookCollection.py',
+                   File(base_path / 'user_guide/NotebookCollection.ipynb'),
+                   dag=dag,
+                   kernelspec_name='python3',
+                   local_execution=True)
+
     dag.build()
diff --git a/docs/source/nbs/.gitignore b/docs/source/nbs/.gitignore
diff --git a/docs/source/nbs/NotebookCollection.py b/docs/source/nbs/NotebookCollection.py
@@ -1,6 +1,3 @@
-# %load_ext autoreload
-# %autoreload 2
-
 # # Analyzing results from notebooks
 #
 # The `.ipynb` format is capable of holding tables and charts in a standalone file. This makes it a great choice for model evaluation reports. `NotebookCollection` allows you to retrieve results from previously executed notebooks to quickly compare the differences.
@@ -9,7 +6,7 @@
 import papermill as pm
 import jupytext
 
-from sklearn_evaluation import NotebookIntrospector, NotebookCollection
+from sklearn_evaluation import NotebookCollection
 # -
 
 # Let's first generate a few notebooks, we have a `train.py` script that contains code for training a model, let's convert that to a jupyter notebook using jupytext:
@@ -21,70 +18,105 @@
 
 # +
 # models with their corresponding parameters
-params = [
-    {'model': 'sklearn.ensemble.RandomForestRegressor', 'params': {'n_estimators': 50}},
-    {'model': 'sklearn.ensemble.RandomForestRegressor', 'params': {'n_estimators': 100}},
-    {'model': 'sklearn.linear_model.LinearRegression', 'params': {'normalize': True}},
-    {'model': 'sklearn.svm.LinearSVR', 'params': {}},
-]
+params = [{
+    'model': 'sklearn.ensemble.RandomForestRegressor',
+    'params': {
+        'n_estimators': 50
+    }
+}, {
+    'model': 'sklearn.ensemble.RandomForestRegressor',
+    'params': {
+        'n_estimators': 100
+    }
+}, {
+    'model': 'sklearn.linear_model.LinearRegression',
+    'params': {
+        'normalize': True
+    }
+}, {
+    'model': 'sklearn.svm.LinearSVR',
+    'params': {}
+}]
 
 # ids to identify each experiment
-keys = ['random_forest_1', 'random_forest_2', 'linear_regression', 'support_vector_regression']
+ids = [
+    'random_forest_1', 'random_forest_2', 'linear_regression',
+    'support_vector_regression'
+]
 
 # output files
-files = [f'{k}.ipynb' for k in keys]
+files = [f'{i}.ipynb' for i in ids]
 
 # execute notebooks using papermill
 for f, p in zip(files, params):
-    pm.execute_notebook('train.ipynb', output_path=f, parameters=p, progress_bar=False)
+    pm.execute_notebook('train.ipynb',
+                        output_path=f,
+                        parameters=p,
+                        progress_bar=False)
 # -
 
-# Let's now use `NotebookCollection` to compare results, we just have to pass a list of files to use, and optionally, ids for each notebook (uses the path by default). The only requirement for this to work is to tag cells in the notebooks, each tag then becomes a key in the notebook collection. (To activate the tags menu click on `View -> Cell toolbar -> Tags`).
+# Let's now use `NotebookCollection` to compare results.
+#
+# We just have to pass a list of files to use, and optionally, ids for each notebook (uses the path by default).
+#
+# The only requirement for this to work is to tag cells in the notebooks, each tag then becomes a key in the notebook collection. (To activate the tags menu click on `View -> Cell toolbar -> Tags`).
+#
+# Extracted tables color cells to make it easier to spot the best and worst experiment. By default it assumes that metrics are errors (smaller is better), if you only have score metrics (larger is better), pass `scores=True`, if you have errors and scores, pass a list of scores:
 
-nbs = NotebookCollection(paths=files, keys=keys)
+nbs = NotebookCollection(paths=files, ids=ids, scores=['r2'])
 
 # To get a list of tags available:
 
 list(nbs)
 
-# We can get dictionaries with model parameters, click on the tabs to switch:
+# `model_params` contains a dictionary with model parameters, let's get them (click on the tabs to switch):
 
 # Pro-tip: then typing the tag, press the "Tab" key for autocompletion!
 nbs['model_params']
 
-# And plots:
+# `plot` has a `y_true` vs `y_pred` chart:
 
 nbs['plot']
 
-# Tables are also available. A "Summary" tab is added showing all results at once with colors to identify the minimum (green) error value and maximum (red):
+# If any cell outputs a pandas DataFrame, we can also retrieve it. `metrics` outputs a data frame with a single row where columns are mean absolute error (mae) and mean squared error (mse).
+#
+# For single-row tables, a "Compare" tab is added showing all results at once with colors to identify the minimum (green) and maximum (red) error values and the other way around for scores:
 
 nbs['metrics']
 
-# If instead of single-row tables, each experiment generates tables with multiple rows, the summary is not generated:
+# We can quickly see that the first random forest is performing the best in both metrics.
+#
+# Multi-row tables can also be retrieved, `river` contains a table where we have both error metrics broken down by the `CHAS` indicator features. Multi-row tables *do not* display the "Compare" tab:
 
 nbs['river']
 
-# When comparing two, notebooks there are a few differences:
+# If we only compare two notebooks, the output is a bit different:
 
 # only compare two notebooks
-nbs = NotebookCollection(paths=files[:2], keys=keys)
+nbs_two = NotebookCollection(paths=files[:2], ids=ids[:2], scores=['r2'])
 
-# The summary that compares single-row tables includes a diff columns:
+# The output that compares single-row tables includes a diff column, with the error difference between experiments. Error reductions are showed in green, increments in red:
 
-nbs['metrics']
+nbs_two['metrics']
 
-# When comparing multi-row tables, the Summary tab appears, showing the difference between the tables:
+# When comparing multi-row tables, the "Compare" tab appears, showing the difference between the tables:
 
-nbs['river']
+nbs_two['river']
 
-# Dictionaries generate a Summary tab with the differences:
+# When displaying dictionaries, a "Compare" tab shows with a diff view:
 
-nbs['model_params']
+nbs_two['model_params']
+
+# Lists (and sets) are compared depending on their elements:
+
+nbs_two['feature_names']
 
 # ## Using the mapping interface
 #
-# `NotebookCollection` objects have a dict-like interface, you can retrieve data from individual notebooks:
+# `NotebookCollection` has a dict-like interface, you can retrieve data from individual notebooks:
 
 nbs['model_params']['random_forest_1']
 
 nbs['plot']['random_forest_2']
+
+
diff --git a/docs/source/nbs/train.py b/docs/source/nbs/train.py
@@ -39,6 +39,10 @@
 name, module = tokens[-1], '.'.join(tokens[:-1])
 class_ = getattr(importlib.import_module(module), name)
 
+# + tags=["feature_names"]
+d['feature_names']
+# -
+
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
 
 model = class_(**params)
@@ -61,7 +65,8 @@
 
 metrics_ = {
     'mae': metrics.mean_absolute_error(y_test, y_pred),
-    'mse': metrics.mean_squared_error(y_test, y_pred)
+    'mse': metrics.mean_squared_error(y_test, y_pred),
+    'r2': metrics.r2_score(y_test, y_pred),
 }
 
 # + tags=["metrics"]
@@ -79,7 +84,11 @@
 # CHAS: Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
 error_river = df.groupby('CHAS')[['error_abs', 'error_sq']].mean()
 error_river.columns = ['mae', 'mse']
-error_river
-# -
 
+def r2_score(df):
+    return metrics.r2_score(df.y_true, df.y_pred)
+
+r2 = pd.DataFrame(df.groupby('CHAS').apply(r2_score))
+r2.columns = ['r2']
 
+error_river.merge(r2, on='CHAS')
diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst
@@ -9,4 +9,5 @@ User Guide
     user_guide/func_vs_oop
     user_guide/grid_search
     user_guide/matplotlib
-    user_guide/SQLiteTracker
+    user_guide/SQLiteTracker
+    user_guide/NotebookCollection