# Шаг 1. Подготовка данных #

In [6]:
import numpy as np

x = np.arange(1,11)
y = 2 * x + np.random.randn(10)*2
X = np.vstack((x,y))


In [10]:
X

array([[  1.        ,   2.        ,   3.        ,   4.        ,
          5.        ,   6.        ,   7.        ,   8.        ,
          9.        ,  10.        ],
       [ -0.29805203,   7.03610882,   5.28680483,   4.55435147,
         13.37885384,   8.70291693,  16.2341572 ,  14.74502939,
         16.83718947,  18.00277352]])

In [32]:
import plotly.plotly as py
from plotly.graph_objs import *

trace = Scatter(
    x=X[0],
    y=X[1]
)

data = [trace]
layout = dict(title = 'Тестовая выборка')
fig = dict(data=data, layout=layout)
py.iplot(fig)

In [13]:
Xcentered = (X[0] - x.mean(), X[1] - y.mean())
m = (x.mean(), y.mean())
Xcentered


(array([-4.5, -3.5, -2.5, -1.5, -0.5,  0.5,  1.5,  2.5,  3.5,  4.5]),
 array([-10.74606538,  -3.41190452,  -5.16120851,  -5.89366188,
          2.9308405 ,  -1.74509641,   5.78614386,   4.29701605,
          6.38917612,   7.55476018]))

In [14]:
m

(5.5, 10.448013344416282)

In [33]:
trace = Scatter(
    x=Xcentered[0],
    y=Xcentered[1]
)

data = [trace]
layout = dict(title = 'Отцентрованная выборка')
fig = dict(data=data, layout=layout)
py.iplot(fig)

# Шаг 2. Ковариационная матрица #

In [43]:
covmat = np.cov(Xcentered)

In [44]:
covmat

array([[  9.16666667,  17.27608875],
       [ 17.27608875,  38.88525612]])

In [47]:
#"Variance of X: " 
np.cov(Xcentered)[0,0]

9.1666666666666661

In [48]:
#"Variance of Y: "
np.cov(Xcentered)[1,1]

38.885256124633202

In [49]:
#"Covariance X and Y: "
np.cov(Xcentered)[0,1]

17.276088752637676

# Шаг 3. Собственные вектора и значения (айгенпары) #

In [60]:
_, vecs = np.linalg.eig(covmat)
v = -vecs[:,1]
Xnew = np.dot(v,Xcentered)
Xnew

array([-11.64363343,  -4.56075968,  -5.73356662,  -5.98218943,
         2.4552097 ,  -1.37752352,   5.88446962,   4.94812878,
         7.2667099 ,   8.74315469])

# Шаг 5. Восстановление данных


In [63]:
n = 9     
Xrestored = np.dot(Xnew[n],v) + m

In [65]:
Xrestored #RestoredX

array([  9.14660753,  18.39439664])

In [66]:
X[:,n] #Original

array([ 10.        ,  18.00277352])

# Вместо заключения – проверка алгоритма


In [68]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 1)
XPCAreduced = pca.fit_transform(np.transpose(X))

In [69]:
Xnew # Our reduced X

array([-11.64363343,  -4.56075968,  -5.73356662,  -5.98218943,
         2.4552097 ,  -1.37752352,   5.88446962,   4.94812878,
         7.2667099 ,   8.74315469])

In [70]:
XPCAreduced #Sklearn reduced X

array([[ 11.64363343],
       [  4.56075968],
       [  5.73356662],
       [  5.98218943],
       [ -2.4552097 ],
       [  1.37752352],
       [ -5.88446962],
       [ -4.94812878],
       [ -7.2667099 ],
       [ -8.74315469]])