<a href="https://colab.research.google.com/github/gayakwadmilind/CoPo/blob/main/PCA_Basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

# Step 1: Create the dataset
data = np.array([[2, 3],
                 [3, 5],
                 [5, 7],
                 [6, 8],
                 [8, 9]])

# Convert the array to a pandas DataFrame for convenience
df = pd.DataFrame(data, columns=['X1', 'X2'])

# Step 2: Standardize the data (zero mean and unit variance)
mean = np.mean(df, axis=0)
std_dev = np.std(df, axis=0)
df_standardized = (df - mean) / std_dev

# Step 3: Calculate the covariance matrix
cov_matrix = np.cov(df_standardized.T)

# Step 4: Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 5: Sort the eigenvalues in descending order
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Step 6: Choose the number of principal components (k)
# Here, let's reduce the data to 1 dimension (PC1)
k = 1
eigenvectors_subset = sorted_eigenvectors[:, :k]

# Step 7: Transform the data
df_transformed = np.dot(df_standardized, eigenvectors_subset)

# Results
print("Original Data:\n", df)
print("\nStandardized Data:\n", df_standardized)
print("\nCovariance Matrix:\n", cov_matrix)
print("\nEigenvalues:\n", eigenvalues)
print("\nEigenvectors:\n", eigenvectors)
print("\nTransformed Data (PCA, k=1):\n", df_transformed)


Original Data:
    X1  X2
0   2   3
1   3   5
2   5   7
3   6   8
4   8   9

Standardized Data:
          X1        X2
0 -1.311220 -1.578410
1 -0.842927 -0.649934
2  0.093659  0.278543
3  0.561951  0.742781
4  1.498537  1.207020

Covariance Matrix:
 [[1.25       1.21743733]
 [1.21743733 1.25      ]]

Eigenvalues:
 [0.03256267 2.46743733]

Eigenvectors:
 [[-0.70710678 -0.70710678]
 [ 0.70710678 -0.70710678]]

Transformed Data (PCA, k=1):
 [[ 2.04327733]
 [ 1.05561208]
 [-0.26318627]
 [-0.92258544]
 [-1.9131177 ]]
