In [1]:
from sklearn import datasets
import numpy as np

In [26]:
mat = datasets.make_spd_matrix(3)
print (type(mat))
print (mat)

<class 'numpy.ndarray'>
[[ 0.93048419  0.54920872 -0.21064148]
 [ 0.54920872  3.28137269 -1.01849041]
 [-0.21064148 -1.01849041  1.26375107]]


In [27]:
masking_array = np.random.binomial(1, .1, mat.shape).astype(bool)
print(masking_array)

[[False False False]
 [False False  True]
 [False False False]]


In [28]:
mat[masking_array] = np.nan
print(mat)

[[ 0.93048419  0.54920872 -0.21064148]
 [ 0.54920872  3.28137269         nan]
 [-0.21064148 -1.01849041  1.26375107]]


NON Pipeline method

In [29]:
from sklearn import preprocessing

In [35]:
# Imputation transformer for completing missing values.
impute = preprocessing.Imputer()
impute

Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)

In [36]:
# Standardize features by removing the mean and scaling to unit variance
scaler = preprocessing.StandardScaler()
scaler

StandardScaler(copy=True, with_mean=True, with_std=True)

In [32]:
mat_imputed = impute.fit_transform(mat)
print(mat_imputed)

[[ 0.93048419  0.54920872 -0.21064148]
 [ 0.54920872  3.28137269  0.5265548 ]
 [-0.21064148 -1.01849041  1.26375107]]


In [34]:
mat_imp_and_scaled = scaler.fit_transform(mat_imputed)
print(mat_imp_and_scaled)

[[ 1.06985795 -0.21846477 -1.22474487]
 [ 0.26604105  1.31927571  0.        ]
 [-1.335899   -1.10081094  1.22474487]]


Pipeline method

In [43]:
from sklearn import pipeline
pipe = pipeline.Pipeline([('impute0', impute), ('scaler0', scaler)])

In [44]:
# check the steps in the pipeline
pipe

Pipeline(steps=[('impute0', Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)), ('scaler0', StandardScaler(copy=True, with_mean=True, with_std=True))])

In [45]:
len(pipe.steps)

2

In [46]:
pipe.steps[0]

('impute0',
 Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0))

In [49]:
# simply call the fit_transform method on the pipe object.
# These separate steps are completed in a single step:
new_mat = pipe.fit_transform(mat)
# NOTE:
# Both impute & scaler have the .fit_transform() function!
print(new_mat)

[[ 1.06985795 -0.21846477 -1.22474487]
 [ 0.26604105  1.31927571  0.        ]
 [-1.335899   -1.10081094  1.22474487]]
