PRINCIPAL COMPONENT ANALYSIS

In [10]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
X = load_iris().data
Y= load_iris().target

In [11]:
class PCA:
    def __init__(self, num_components):
        self.num_components = num_components
        self.mean = None
        self.std = None
        self.eigenvalues = None
        self.eigenvectors = None

    def fit(self, X):
        self.mean = np.mean(X, axis=0)
        self.std = np.std(X, axis=0)
        X_std = (X - self.mean) / self.std

        cov_matrix = np.cov(X_std.T)

        eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
        sort_eigenvalues = np.argsort(eigenvalues)[::-1]
        self.eigenvalues = eigenvalues[sort_eigenvalues]
        self.eigenvectors = eigenvectors[:, sort_eigenvalues]
        self.components = self.eigenvectors[:, :self.num_components]

    def transform(self, X):
        X_std = (X - self.mean) / self.std
        return X_std.dot(self.components)

    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

    def explained_variance(self):
        return self.eigenvalues

    def components_(self):
        return self.eigenvectors


In [12]:
pca =PCA(2)

In [13]:
pca.fit_transform(X)

array([[-2.26470281, -0.4800266 ],
       [-2.08096115,  0.67413356],
       [-2.36422905,  0.34190802],
       [-2.29938422,  0.59739451],
       [-2.38984217, -0.64683538],
       [-2.07563095, -1.48917752],
       [-2.44402884, -0.0476442 ],
       [-2.23284716, -0.22314807],
       [-2.33464048,  1.11532768],
       [-2.18432817,  0.46901356],
       [-2.1663101 , -1.04369065],
       [-2.32613087, -0.13307834],
       [-2.2184509 ,  0.72867617],
       [-2.6331007 ,  0.96150673],
       [-2.1987406 , -1.86005711],
       [-2.26221453, -2.68628449],
       [-2.2075877 , -1.48360936],
       [-2.19034951, -0.48883832],
       [-1.898572  , -1.40501879],
       [-2.34336905, -1.12784938],
       [-1.914323  , -0.40885571],
       [-2.20701284, -0.92412143],
       [-2.7743447 , -0.45834367],
       [-1.81866953, -0.08555853],
       [-2.22716331, -0.13725446],
       [-1.95184633,  0.62561859],
       [-2.05115137, -0.24216355],
       [-2.16857717, -0.52714953],
       [-2.13956345,

In [16]:
pca.explained_variance() #eigen values

array([2.93808505, 0.9201649 , 0.14774182, 0.02085386])

In [17]:
pca.components_()  #eigen vectors

array([[ 0.52106591, -0.37741762, -0.71956635,  0.26128628],
       [-0.26934744, -0.92329566,  0.24438178, -0.12350962],
       [ 0.5804131 , -0.02449161,  0.14212637, -0.80144925],
       [ 0.56485654, -0.06694199,  0.63427274,  0.52359713]])