<a href="https://colab.research.google.com/github/abhirampitla-13/2311cs020528/blob/main/principal_component.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import numpy as np

# Step 1: Data standardization
def standardize(X):
    return (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Step 2: Covariance matrix calculation
def compute_covariance_matrix(X):
    return np.cov(X.T)

# Step 3: Eigenvalue and eigenvector calculation
def find_eigenvectors_and_eigenvalues(X):
    cov_matrix = compute_covariance_matrix(X)
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    return eigenvalues, eigenvectors

# Step 4: Principal component calculation
def project_data(X, eigenvectors, k):
    sorted_index = np.argsort(eigenvalues)[::-1]  # Sort eigenvalues in descending order
    sorted_eigenvectors = eigenvectors[:, sorted_index[:k]]  # Select top k eigenvectors
    return np.dot(X, sorted_eigenvectors)

# Step 5: Dimensionality reduction
def get_variance_explained(eigenvalues, k):
    return sum(eigenvalues[:k]) / sum(eigenvalues)

# Example usage
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
X_std = standardize(X)
eigenvalues, eigenvectors = find_eigenvectors_and_eigenvalues(X_std)
projected_data = project_data(X_std, eigenvectors, 2)
variance_explained = get_variance_explained(eigenvalues, 2)

print("Standardized data:")
print(X_std)
print("Covariance matrix:")
print(compute_covariance_matrix(X_std))
print("Eigenvalues:")
print(eigenvalues)
print("Eigenvectors:")
print(eigenvectors)
print("Projected data:")
print(projected_data)
print("Variance explained:")
print(variance_explained)

Standardized data:
[[-1.22474487 -1.22474487 -1.22474487]
 [ 0.          0.          0.        ]
 [ 1.22474487  1.22474487  1.22474487]]
Covariance matrix:
[[1.5 1.5 1.5]
 [1.5 1.5 1.5]
 [1.5 1.5 1.5]]
Eigenvalues:
[0.  4.5 0. ]
Eigenvectors:
[[-0.81649658  0.57735027  0.        ]
 [ 0.40824829  0.57735027 -0.70710678]
 [ 0.40824829  0.57735027  0.70710678]]
Projected data:
[[-2.12132034e+00  1.68306266e-16]
 [ 0.00000000e+00  0.00000000e+00]
 [ 2.12132034e+00 -1.68306266e-16]]
Variance explained:
1.0
