<a href="https://colab.research.google.com/github/Yonah18/ML_Learning/blob/main/PCA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

# Step 1: Create the dataset
data = {
    "Student": ["Aarav", "Diya", "Karthik", "Meera", "Rahul",
                "Sneha", "Arjun", "Pooja", "Nithya", "Varun"],
    "Internal1": [48, 55, 42, 50, 38, 46, 59, 44, 51, 36],
    "Internal2": [52, 57, 45, 53, 41, 49, 58, 47, 54, 40]
}

df = pd.DataFrame(data)

# Step 2: Select numerical features
X = df[["Internal1", "Internal2"]].values

# Step 3: Mean centering
mean = np.mean(X, axis=0)
X_centered = X - mean

print("Mean:\n", mean)
print("\nMean Centered Data:\n", X_centered)

# Step 4: Covariance matrix
cov_matrix = np.cov(X_centered.T)
print("\nCovariance Matrix:\n", cov_matrix)

# Step 5: Eigenvalues and Eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

print("\nEigenvalues:\n", eigenvalues)
print("\nEigenvectors:\n", eigenvectors)

# Step 6: Sort eigenvalues & eigenvectors
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

# Step 7: Select principal component (k=1)
k = 1
principal_components = eigenvectors[:, :k]

# Step 8: Transform the data
X_pca = X_centered @ principal_components

print("\nPrincipal Component:\n", principal_components)
print("\nTransformed Data (PCA Output):\n", X_pca)

# Step 9: Explained variance
explained_variance = eigenvalues / np.sum(eigenvalues)
print("\nExplained Variance Ratio:\n", explained_variance)


Mean:
 [46.9 49.6]

Mean Centered Data:
 [[  1.1   2.4]
 [  8.1   7.4]
 [ -4.9  -4.6]
 [  3.1   3.4]
 [ -8.9  -8.6]
 [ -0.9  -0.6]
 [ 12.1   8.4]
 [ -2.9  -2.6]
 [  4.1   4.4]
 [-10.9  -9.6]]

Covariance Matrix:
 [[52.32222222 44.95555556]
 [44.95555556 39.6       ]]

Eigenvalues:
 [91.36447784  0.55774439]

Eigenvectors:
 [[ 0.75501728 -0.65570489]
 [ 0.65570489  0.75501728]]

Principal Component:
 [[0.75501728]
 [0.65570489]]

Transformed Data (PCA Output):
 [[  2.40421075]
 [ 10.96785618]
 [ -6.71582718]
 [  4.5699502 ]
 [-12.35871587]
 [ -1.07293849]
 [ 14.64363019]
 [ -3.89438283]
 [  5.98067238]
 [-14.52445533]]

Explained Variance Ratio:
 [0.99393243 0.00606757]
