Skip to content

Commit

Permalink
DOC started on example.
Browse files Browse the repository at this point in the history
  • Loading branch information
amueller committed Sep 2, 2012
1 parent a2eae0a commit 6513593
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
46 changes: 46 additions & 0 deletions examples/plot_grid_search.py
@@ -0,0 +1,46 @@
"""
=====================================================
Visualizing results of high dimensional grid searches
=====================================================
Often one is faced with combining feature extraction, feature selection
and classification into a complex pipeline.
Each individual step usually has many tunable parameters. Finding the
important parameters for a given task and picking robust settings is often
hard.
This example show how to visualize results of a grid search with
many interacting parameters.
The ``DecisionTreeClassifier`` is a good model for a complex pipeline as there
are many parameters to tweak, but only few have significant influence.
"""
print __doc__

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_digits
from sklearn.grid_search import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

iris = load_digits()
X, y = iris.data, iris.target

param_grid = {'max_depth': np.arange(1, 10, 2), 'min_samples_leaf': [1, 5, 10],
'min_samples_split': [1, 5, 10],
'max_features': [1, 10, 30, 40, 64]}

grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid=param_grid,
cv=3)
grid_search.fit(X, y)

results = grid_search.scores_

fig, axes = plt.subplots(2, 2)
axes = axes.ravel()

for ax, param in zip(axes, results.params):
ax.errorbar(results.values[param], results.accumulated_mean(param, 'max'),
yerr=results.accumulated_std(param, 'max'))
ax.set_title(param)
plt.show()
7 changes: 1 addition & 6 deletions examples/svm/plot_rbf_parameters.py
Expand Up @@ -105,12 +105,7 @@
pl.axis('tight')

# plot the scores of the grid
# grid_scores_ contains parameter settings and scores
score_dict = grid.grid_scores_

# We extract just the scores
scores = [x[1] for x in score_dict]
scores = np.array(scores).reshape(len(C_range), len(gamma_range))
scores = grid.scores_.mean()

# draw heatmap of accuracy as a function of gamma and C
pl.figure(figsize=(8, 6))
Expand Down
5 changes: 4 additions & 1 deletion sklearn/grid_search.py
Expand Up @@ -76,7 +76,6 @@ def accumulated_mean(self, param, kind="mean"):
1d array of scores corresponding to the different settings
of ``param``.
"""

return self._accumulate(self.mean(), param, kind)

def accumulated_std(self, param, kind="mean"):
Expand Down Expand Up @@ -368,6 +367,10 @@ class GridSearchCV(BaseEstimator, MetaEstimatorMixin):
`best_params_` : dict
Parameter setting that gave the best results on the hold out data.
`scores_`: list of ResultGrid
For each dict in ``param_grid`` this holds a ``ResultGrid`` that
provides easy analysis of the grid search scores.
Notes
------
The parameters selected are those that maximize the score of the left out
Expand Down

0 comments on commit 6513593

Please sign in to comment.