In [55]:
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import Imputer, StandardScaler, MinMaxScaler

In [56]:
X = np.array([
        [1, 2, 3, 4,      5],
        [2, 3, 4, np.nan, 6],
        [3, 4, np.nan, 6, 7],
        [4, 5, np.nan, 7, 8],
    ])
Y = np.array([0, 1, 0, 1])

In [57]:
X

array([[  1.,   2.,   3.,   4.,   5.],
       [  2.,   3.,   4.,  nan,   6.],
       [  3.,   4.,  nan,   6.,   7.],
       [  4.,   5.,  nan,   7.,   8.]])

In [58]:
imp = Imputer(missing_values=np.nan,
              strategy="mean",
              axis=0)
X_imp = imp.fit_transform(X)
X_imp

array([[ 1.        ,  2.        ,  3.        ,  4.        ,  5.        ],
       [ 2.        ,  3.        ,  4.        ,  5.66666667,  6.        ],
       [ 3.        ,  4.        ,  3.5       ,  6.        ,  7.        ],
       [ 4.        ,  5.        ,  3.5       ,  7.        ,  8.        ]])

In [72]:
scale = StandardScaler()
X_scale = scale.fit_transform(X_imp)
X_scale

array([[-1.34164079, -1.34164079, -1.41421356, -1.5430335 , -1.34164079],
       [-0.4472136 , -0.4472136 ,  1.41421356,  0.        , -0.4472136 ],
       [ 0.4472136 ,  0.4472136 ,  0.        ,  0.3086067 ,  0.4472136 ],
       [ 1.34164079,  1.34164079,  0.        ,  1.2344268 ,  1.34164079]])

In [77]:
scale = MinMaxScaler()
X_mm = scale.fit_transform(X_imp)
X_mm

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.33333333,  0.33333333,  1.        ,  0.55555556,  0.33333333],
       [ 0.66666667,  0.66666667,  0.5       ,  0.66666667,  0.66666667],
       [ 1.        ,  1.        ,  0.5       ,  1.        ,  1.        ]])

In [78]:
imp_reg = LinearRegression()
scale_reg = LinearRegression()
mm_reg = LinearRegression()
imp_model = imp_reg.fit(X_imp, Y)
scale_model = scale_reg.fit(X_scale, Y)
mm_model = mm_reg.fit(X_mm, Y)

In [79]:
print imp_model.coef_, imp_model.intercept_
print scale_model.coef_, scale_model.intercept_
print mm_model.coef_, mm_model.intercept_

[-1.66666667 -1.66666667 -4.          6.         -1.66666667] 1.33333333333
[-1.86338998 -1.86338998 -1.41421356  6.4807407  -1.86338998] 0.5
[ -5.  -5.  -4.  18.  -5.] -1.7763568394e-15


In [80]:
print imp_model.predict(X_imp)
print scale_model.predict(X_scale)
print mm_model.predict(X_mm)

[ -5.32907052e-15   1.00000000e+00  -1.06581410e-14   1.00000000e+00]
[ -3.33066907e-16   1.00000000e+00  -4.44089210e-16   1.00000000e+00]
[ -1.77635684e-15   1.00000000e+00  -2.66453526e-15   1.00000000e+00]


In [81]:
estimator = Pipeline([("imputer", Imputer(missing_values=np.nan,
                                          strategy="mean",
                                          axis=0)),
                      ("scaler", StandardScaler()),
                      ("lr", LinearRegression())
                     ])

In [82]:
mdl = estimator.fit(X, Y)

In [83]:
print mdl.predict(X)

[ -3.33066907e-16   1.00000000e+00  -4.44089210e-16   1.00000000e+00]


In [84]:
from sklearn.externals import joblib
joblib.dump(mdl, 'model.pkl') 

['model.pkl',
 'model.pkl_01.npy',
 'model.pkl_02.npy',
 'model.pkl_03.npy',
 'model.pkl_04.npy',
 'model.pkl_05.npy',
 'model.pkl_06.npy',
 'model.pkl_07.npy']

In [85]:
bob = joblib.load('model.pkl')
bob.predict(X)

array([ -3.33066907e-16,   1.00000000e+00,  -4.44089210e-16,
         1.00000000e+00])

In [87]:
from sklearn.linear_model import Perceptron

percep_estimator = Pipeline([("imputer", Imputer(missing_values=np.nan,
                                          strategy="mean",
                                          axis=0)),
                      ("scaler", StandardScaler()),
                      ("perceptron", Perceptron())
                     ])

In [89]:
percep_estimator.fit(X, Y)

Pipeline(steps=[('imputer', Imputer(axis=0, copy=True, missing_values=nan, strategy='mean', verbose=0)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('perceptron', Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      n_iter=5, n_jobs=1, penalty=None, random_state=0, shuffle=True,
      verbose=0, warm_start=False))])

In [94]:
print percep_estimator.steps[2]

('perceptron', Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      n_iter=5, n_jobs=1, penalty=None, random_state=0, shuffle=True,
      verbose=0, warm_start=False))
