# Import Modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Load Data Set

In [2]:
from sklearn.datasets import load_boston

boston = load_boston()
X = boston.data
y = boston.target

# Split Train, Test Set

In [3]:
from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Scale and Fit a Model

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

#Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

#Fit Ridge
rgr = Ridge()
rgr.fit(X_train_scaled, y_train)

X_test_scaled  = scaler.transform(X_test)
rgr.score(X_test_scaled, y_test)

0.63448846877867415

# Create a Pipeline

In [5]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([("scaler", StandardScaler()), ("ridge", Ridge())])
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

0.63448846877867415

In [7]:
print(pipe.named_steps['scaler'])
print(pipe.named_steps['ridge'])

StandardScaler(copy=True, with_mean=True, with_std=True)
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, solver='auto', tol=0.001)


# Grid Search

In [9]:
from sklearn.grid_search import GridSearchCV

pipe = Pipeline([("scaler", StandardScaler()), ("ridge", Ridge())])
param_grid = {'ridge__alpha': 10. ** np.arange(-3, 5)}
grid = GridSearchCV(pipe, param_grid, cv=10)
grid.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise',
       estimator=Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('ridge', Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, solver='auto', tol=0.001))]),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'ridge__alpha': array([  1.00000e-03,   1.00000e-02,   1.00000e-01,   1.00000e+00,
         1.00000e+01,   1.00000e+02,   1.00000e+03,   1.00000e+04])},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=0)

In [10]:
grid.best_estimator_

Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('ridge', Ridge(alpha=10.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, solver='auto', tol=0.001))])

In [11]:
grid.best_score_

0.71776384977791918

In [12]:
grid.best_params_

{'ridge__alpha': 10.0}

# Pickles 

In [21]:
import pickle
f = open('pickle_test.pickle', 'wb')
pickle.dump(grid, f)
f.close()

In [23]:
f= open('pickle_test.pickle')
grid2 = pickle.load(f)
f.close()

In [24]:
grid2.best_score_

0.71776384977791918