In [10]:
# -*- coding: utf-8 -*-
"""
Created on Tue May 10 15:08:27 2022

@author: ess
"""

import numpy as np
import matplotlib.pyplot as plt
import numpy.random as rnd

### Create random 4d data

In [11]:
mu = np.array([10,13,7,6])
sigma = np.array([[12.61, 11.51, 11.52, 11.32], [11.51, 12.44, 11.36, 11.33], [11.52, 11.36, 12.83, 11.34],
                  [11.32, 11.33, 11.34, 12.27]])
y = np.linspace(1, 100, num = 1000)
print("Mu ", mu.shape)
print("Sigma ", sigma.shape)

Mu  (4,)
Sigma  (4, 4)


### Create 1000 samples using mean and sigma

In [12]:
org_data = rnd.multivariate_normal(mu, sigma, size=(1000))
print("Data shape ", org_data.shape)

Data shape  (1000, 4)


### Subtract mean from data

In [13]:
mean = np.mean(org_data, axis= 0)
print("Mean ", mean.shape)
mean_data = org_data - mean
print("Data after subtracting mean ", org_data.shape, "\n")

Mean  (4,)
Data after subtracting mean  (1000, 4) 



### Compute covariance matrix

In [14]:
cov = np.cov(mean_data.T)
cov = np.round(cov, 2)
print("Covariance matrix ", cov.shape, "\n")

Covariance matrix  (4, 4) 



### Perform eigen decomposition of covariance matrix

In [15]:
eig_val, eig_vec = np.linalg.eig(cov)
print("Eigen vectors ", eig_vec)
print("Eigen values ", eig_val, "\n")

Eigen vectors  [[ 0.50980368 -0.63410773  0.53659252  0.22377682]
 [ 0.50115262  0.73256665  0.22101574  0.40415863]
 [ 0.49483466  0.12477219  0.03564957 -0.85924366]
 [ 0.49404951 -0.21374142 -0.81360287  0.21972725]]
Eigen values  [44.8944829   0.93937197  1.11509142  1.26105371] 



### Sort eigen values and corresponding eigen vectors in descending order

In [16]:
indices = np.arange(0,len(eig_val), 1)
indices = ([x for _,x in sorted(zip(eig_val, indices))])[::-1]
eig_val = eig_val[indices]
eig_vec = eig_vec[:,indices]
print("Sorted Eigen vectors ", eig_vec)
print("Sorted Eigen values ", eig_val, "\n")

Sorted Eigen vectors  [[ 0.50980368  0.22377682  0.53659252 -0.63410773]
 [ 0.50115262  0.40415863  0.22101574  0.73256665]
 [ 0.49483466 -0.85924366  0.03564957  0.12477219]
 [ 0.49404951  0.21972725 -0.81360287 -0.21374142]]
Sorted Eigen values  [44.8944829   1.26105371  1.11509142  0.93937197] 



### Get explained variance

In [17]:
sum_eig_val = np.sum(eig_val)
explained_variance = eig_val/ sum_eig_val
print(explained_variance)
cumulative_variance = np.cumsum(explained_variance)
print(cumulative_variance)

[0.93122761 0.02615751 0.02312988 0.019485  ]
[0.93122761 0.95738512 0.980515   1.        ]


### Take transpose of eigen vectors with data

In [18]:
pca_data = np.dot(mean_data, eig_vec)
print("Transformed data ", pca_data.shape)

Transformed data  (1000, 4)


reference : https://towardsdatascience.com/a-step-by-step-implementation-of-principal-component-analysis-5520cc6cd598