# Principal Components Analysis (PCA)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from dimensionality_red.styler import style_dataframe

In [None]:
np.random.seed(42)
data = np.random.rand(10,4) * 10 

In [None]:
col_names = ['Feature 1','Feature 2','Feature 3', 'Feature 4']
df = pd.DataFrame(data, columns=col_names)

In [None]:
style_dataframe(df)

## Step 1 Standardize the data

In [None]:
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
df_scaled = pd.DataFrame(data_scaled, columns=col_names)
print(df_scaled)

## Step 2 Compute the covariance matrix

In [None]:
cov_matrix = np.cov(data_scaled.T)
df_cov_mat = pd.DataFrame(cov_matrix, 
                          columns=col_names, 
                          index=col_names)
style_dataframe(df_cov_mat)

## Step 3 Compute Eigenvalues and Eigenvectors

In [None]:
eigenvals, eigenvecs = np.linalg.eig(cov_matrix)
print("Eigenvalues:\n", eigenvals)
print("Eigenvectors:\n", eigenvecs)

## Step 4 Sort the Eigenvalues and Eigenvectors

In [None]:
sort_indices = np.argsort(eigenvals)[::-1]
eigenvals = eigenvals[sort_indices]
eigenvecs = eigenvecs[:, sort_indices]

In [None]:
print("Sorted Eigenvalues:\n", eigenvals)
print("Sorted Eigenvectors:\n", eigenvecs)

## Step 5 Choosing Principal Components

In [None]:
top_eigenvecs = eigenvecs[:, :3]
print("Top 3 Principal Components:\n", top_eigenvecs)

## Step 6 Project data onto a new axes

In [None]:
data_pca = np.dot(data_scaled, top_eigenvecs)
df_pca = pd.DataFrame(data_pca, 
                      columns=['PC1', 'PC2', 'PC3'])
style_dataframe(pd.DataFrame(df_pca))

## Step 7 (Optional) Visualize Projections

In [None]:
from modelviz.pca import plot_pca_projection
plot_pca_projection(df_original=df_scaled, df_projected=df_pca,
                    labelpad=2, projected_color='black')