<p style='text-align:center; font-size:250%; font-weight:bold'>PRINCIPLE COMPONENT ANALYSIS</p>
<p style='text-align:center; font-size:150%'>Author: Andrean Yonathan</p>

# Import Library

In [1]:
import numpy as np
from sklearn.decomposition import PCA

# Create Data

In [2]:
# create data
X = np.array([[-1,1],
             [-2,-1],
             [-3,-2],
             [1,1],
             [2,1],
             [3,2]])
X

array([[-1,  1],
       [-2, -1],
       [-3, -2],
       [ 1,  1],
       [ 2,  1],
       [ 3,  2]])

In [3]:
# fit PCA
pca = PCA(n_components = 2)
pca.fit(X)

PCA(n_components=2)

In [4]:
print(pca.fit_transform(X))

[[ 0.50917706  1.08866118]
 [ 2.40151069 -0.10258752]
 [ 3.7751606  -0.43887001]
 [-1.20075534  0.05129376]
 [-2.05572155 -0.46738995]
 [-3.42937146 -0.13110746]]


In [5]:
print(pca.explained_variance_ratio_)

[0.95864607 0.04135393]


PCA 1 mengandun 95.86% data sehingga yang digunakan hanya PCA 1

In [6]:
pca = PCA(n_components = 1)
pca.fit_transform(X)

array([[ 0.50917706],
       [ 2.40151069],
       [ 3.7751606 ],
       [-1.20075534],
       [-2.05572155],
       [-3.42937146]])

# Finding PCA Manual

In [7]:
import pandas as pd
import scipy.linalg as la

In [8]:
# Data
data = np.matrix([[1,2,4],
                 [4,1,2],
                 [5,4,8]])

df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,1,2,4
1,4,1,2
2,5,4,8


In [9]:
# standarize data
standarize_data = (df - df.mean())/df.std()
print('Standarize data:')
print(standarize_data)

Standarize data:
          0         1         2
0 -1.120897 -0.218218 -0.218218
1  0.320256 -0.872872 -0.872872
2  0.800641  1.091089  1.091089


In [10]:
# finding covariance matrix
covariance = np.cov(standarize_data.T, bias = 1)
print('Covariance Matrix:\n', covariance)

Covariance Matrix:
 [[0.66666667 0.27954262 0.27954262]
 [0.27954262 0.66666667 0.66666667]
 [0.27954262 0.66666667 0.66666667]]


In [11]:
# finding eigenvalue and eigenvector
eigenvalues, eigenvector = np.linalg.eig(covariance)
print('Eigen values:\n', eigenvalues.round(2))
print('\n')
print('Eigen vector:\n', eigenvector.round(2))

Eigen values:
 [ 1.52  0.48 -0.  ]


Eigen vector:
 [[-0.42 -0.91  0.  ]
 [-0.64  0.3  -0.71]
 [-0.64  0.3   0.71]]


In [12]:
# finding PCA
n_components = 3
pca_manual = np.matmul(np.array(standarize_data), eigenvector)
pca_manual = pca_manual[:,:n_components]

print(pca_manual.round(2))

[[ 0.75  0.89 -0.  ]
 [ 0.98 -0.81  0.  ]
 [-1.74 -0.08 -0.  ]]
