In [1]:
import pandas as pd
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt

from ucimlrepo import fetch_ucirepo 

In [2]:
class PCA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.components = None
        self.mean = None

    def fit(self, X):
        # center dataset
        self.mean = np.mean(X, axis=0)
        X = X - self.mean

        # find n principal components
        cov = np.cov(X.T)
        eigvecs, eigvals = la.eig(cov)
        idx = np.argsort(eigvals)[::-1]
        eigvals = eigvals[idx][:self.n_components]
        eigvecs = eigvecs.T[idx][:self.n_components]

        self.components = eigvecs.T

    def transform(self, X):
        # center data
        X = X - self.mean

        # project onto principal components
        projection = np.dot(X, self.components)
        return projection

In [3]:
# fetching iris dataset
iris = fetch_ucirepo(id=53) 
  
# data (as pandas dataframes) 
X = iris.data.features 
y = iris.data.targets

dataset = pd.concat((X, y), axis=1)
  
dataset

In [4]:
pca = PCA(2)
pca.fit(X)
X_projections = pca.transform(X)

In [5]:
print(f"Dataset shape before PCA: {X.shape}")
print(f"Dataset shape before PCA: {X_projections.shape}\n")

pc1 = X_projections[:, 0]
pc2 = X_projections[:, 1]

labels, unique_labels = pd.factorize(np.array(y).reshape(-1))

plt.scatter(pc1, pc2, c=labels, alpha=0.8, cmap='viridis')
plt.title("Principal Component Analysis")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.colorbar()
plt.show();