# Simple PCA example:

In [1]:
import numpy as np
import pandas as pd

In [3]:
a = [[0, 0, 0], [1, 2, 3], [2, 4, 5], [3, 6, 2], [4, 8, 3], [5, 10, 1]]
data = pd.DataFrame(a, columns=['x', 'y', 'z'])
data

Unnamed: 0,x,y,z
0,0,0,0
1,1,2,3
2,2,4,5
3,3,6,2
4,4,8,3
5,5,10,1


In [4]:
from sklearn.decomposition import PCA
pca = PCA(svd_solver='randomized', random_state=100)
pca.fit(data)

PCA(copy=True, iterated_power='auto', n_components=None, random_state=100,
  svd_solver='randomized', tol=0.0, whiten=False)

In [5]:
pca.components_

array([[-4.46999485e-01, -8.93998971e-01, -3.09402653e-02],
       [-1.38369073e-02, -2.76738146e-02,  9.99521235e-01],
       [ 8.94427191e-01, -4.47213595e-01, -7.16522981e-19]])

### principle components are the weigheted linear combinations of original attributes:

In [7]:
lin_component = pca.components_[0]
setvec = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
x, y, z = np.linalg.solve(setvec, lin_component)
x, y, z

(-0.4469994854542544, -0.8939989709085091, -0.03094026532139258)

### Principle components are perpendicular to each other:

In [11]:
print(round(np.dot(pca.components_[0], pca.components_[1])))
print(round(np.dot(pca.components_[1], pca.components_[0])))

0.0
0.0


### Principle components variance raise in the order:

In [13]:
pca.explained_variance_ratio_

array([8.51564515e-01, 1.48435485e-01, 9.50131786e-65])

In [26]:
pca.explained_variance_.round(2)

array([17.51,  3.05,  0.  ])

### Fit principle component into the data frame:

In [28]:
new_data = pca.fit_transform(data)
pd.DataFrame(new_data, columns=['x', 'y', 'z']).round(2)

Unnamed: 0,x,y,z
0,5.66,-2.16,0.0
1,3.33,0.77,-0.0
2,1.03,2.7,0.0
3,-1.11,-0.37,0.0
4,-3.37,0.56,-0.0
5,-5.55,-1.51,0.0
