In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import RidgeClassifier
from sklearn.model_selection import cross_val_score

In [2]:
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = pd.Series(data.target)
X = df.drop('target',axis=1)
y = df['target']
n_components = 3

In [3]:
def Handmade_PCA(X,n_components=2):
    #Scale our data to be normalized
    sc = StandardScaler()
    A = sc.fit_transform(X)
    
    #Get the covariance matrix
    V = np.cov(A,rowvar = False)
    
    #Get eigen_values and eigen vectors for our covariance matrix
    eig_vals, eig_vecs = np.linalg.eigh(V)
    
    #Now Sorting the eigen values and vectors with the highest eigen values
    sorted_index = np.argsort(eig_vals)[::-1]
    sorted_eig_vals = eig_vals[sorted_index]
    sorted_eig_vecs = eig_vecs[:,sorted_index]
    
    # Subset our eigen vectors to get only the required principle components
    eigvecs_subset = sorted_eig_vecs[:,:n_components]
    
    #Get the output PCA matrix
    A_reduced = (eigvecs_subset.T @ A.T).T
    
    #Create names for the columns
    columns = [f'PC {i+1}' for i in range(n_components)]
    
    #Convert it to a pandas dataframe with the column names we created
    P = pd.DataFrame(A_reduced,columns=columns)
    return P

In [4]:
my_output = Handmade_PCA(X,n_components)

In [5]:
sc = StandardScaler()
X = sc.fit_transform(X)
pca = PCA(n_components)

In [6]:
sklearn_output = pd.DataFrame(pca.fit_transform(X),columns = [f'PC {i+1}' for i in range(n_components)])

In [7]:
rc = RidgeClassifier()
print(cross_val_score(rc,X,y))
print(cross_val_score(rc,my_output,y))
print(cross_val_score(rc,sklearn_output,y))

# The output from the hand implementation and the sklearn implementation is exactly the same
# it increases the best accuracy value after doing PCA

[0.8        0.83333333 0.83333333 0.76666667 0.86666667]
[0.8        0.86666667 0.9        0.73333333 0.83333333]
[0.8        0.86666667 0.9        0.73333333 0.83333333]
