<a href="https://colab.research.google.com/github/abhiram274/MachineLearning_concepts/blob/main/PCA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [5]:

# Step 0: Create Sample Data

data = pd.DataFrame({
    'Feature1':[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1],
    'Feature2':[2.4, 0.7, 2.9, 2.2, 3, 2.7, 1.6, 1.1, 1.6, 0.9]
})
print("Original Data:\n", data)

Original Data:
    Feature1  Feature2
0       2.5       2.4
1       0.5       0.7
2       2.2       2.9
3       1.9       2.2
4       3.1       3.0
5       2.3       2.7
6       2.0       1.6
7       1.0       1.1
8       1.5       1.6
9       1.1       0.9


In [6]:
# Step 1: Standardize first
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)
print("\nStep 1 - Scaled Data:\n", scaled_data)

# Step 2: Covariance matrix
cov_matrix = np.cov(scaled_data.T)
print("\nStep 2 - Covariance Matrix:\n", cov_matrix)


# Step 3: Eigenvectors and Eigenvalues
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print("\nStep 3 - Eigenvalues:\n", eigenvalues)
print("\nStep 3 - Eigenvectors:\n", eigenvectors)

# Step 4: Select top k components
pca = PCA(n_components=1)  # Selecting 1 principal component
print("\nStep 4 - PCA object created with 1 component:\n", pca)

# Step 5: Transform the data
principal_components = pca.fit_transform(scaled_data)  # Fit and transform the scaled data
pca_df = pd.DataFrame(data = principal_components, columns = ['PC1'])
print("\nStep 5 - Transformed Data (Lower Dimension):\n", pca_df)

# Step 6: Check Explained Variance
print("\nStep 6 - Explained Variance Ratio:", pca.explained_variance_ratio_)


Step 1 - Scaled Data:
 [[ 0.92627881  0.61016865]
 [-1.7585873  -1.506743  ]
 [ 0.52354889  1.23278973]
 [ 0.12081898  0.36112022]
 [ 1.73173864  1.35731394]
 [ 0.6577922   0.9837413 ]
 [ 0.25506228 -0.38602507]
 [-1.08737078 -1.00864614]
 [-0.41615425 -0.38602507]
 [-0.95312747 -1.25769457]]

Step 2 - Covariance Matrix:
 [[1.11111111 1.0288103 ]
 [1.0288103  1.11111111]]

Step 3 - Eigenvalues:
 [2.13992141 0.08230081]

Step 3 - Eigenvectors:
 [[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]

Step 4 - PCA object created with 1 component:
 PCA(n_components=1)

Step 5 - Transformed Data (Lower Dimension):
         PC1
0  1.086432
1 -2.308937
2  1.241919
3  0.340782
4  2.184290
5  1.160739
6 -0.092605
7 -1.482108
8 -0.567226
9 -1.563287

Step 6 - Explained Variance Ratio: [0.96296464]
