DOC started on example.

amueller · Sep 2, 2012 · 6513593 · 6513593
1 parent a2eae0a
commit 6513593
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 7 deletions.
diff --git a/examples/plot_grid_search.py b/examples/plot_grid_search.py
@@ -0,0 +1,46 @@
+"""
+=====================================================
+Visualizing results of high dimensional grid searches
+=====================================================
+
+Often one is faced with combining feature extraction, feature selection
+and classification into a complex pipeline.
+Each individual step usually has many tunable parameters.  Finding the
+important parameters for a given task and picking robust settings is often
+hard.
+
+This example show how to visualize results of a grid search with
+many interacting parameters.
+The ``DecisionTreeClassifier`` is a good model for a complex pipeline as there
+are many parameters to tweak, but only few have significant influence.
+"""
+print __doc__
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.datasets import load_digits
+from sklearn.grid_search import GridSearchCV
+from sklearn.tree import DecisionTreeClassifier
+
+iris = load_digits()
+X, y = iris.data, iris.target
+
+param_grid = {'max_depth': np.arange(1, 10, 2), 'min_samples_leaf': [1, 5, 10],
+              'min_samples_split': [1, 5, 10],
+              'max_features': [1, 10, 30, 40, 64]}
+
+grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid=param_grid,
+                            cv=3)
+grid_search.fit(X, y)
+
+results = grid_search.scores_
+
+fig, axes = plt.subplots(2, 2)
+axes = axes.ravel()
+
+for ax, param in zip(axes, results.params):
+    ax.errorbar(results.values[param], results.accumulated_mean(param, 'max'),
+            yerr=results.accumulated_std(param, 'max'))
+    ax.set_title(param)
+plt.show()
diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
@@ -105,12 +105,7 @@
     pl.axis('tight')
 
 # plot the scores of the grid
-# grid_scores_ contains parameter settings and scores
-score_dict = grid.grid_scores_
-
-# We extract just the scores
-scores = [x[1] for x in score_dict]
-scores = np.array(scores).reshape(len(C_range), len(gamma_range))
+scores = grid.scores_.mean()
 
 # draw heatmap of accuracy as a function of gamma and C
 pl.figure(figsize=(8, 6))

diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
@@ -76,7 +76,6 @@ def accumulated_mean(self, param, kind="mean"):
             1d array of scores corresponding to the different settings
             of ``param``.
         """
-
         return self._accumulate(self.mean(), param, kind)
 
     def accumulated_std(self, param, kind="mean"):
@@ -368,6 +367,10 @@ class GridSearchCV(BaseEstimator, MetaEstimatorMixin):
     `best_params_` : dict
         Parameter setting that gave the best results on the hold out data.
 
+    `scores_`: list of ResultGrid
+        For each dict in ``param_grid`` this holds a ``ResultGrid`` that
+        provides easy analysis of the grid search scores.
+
     Notes
     ------
     The parameters selected are those that maximize the score of the left out