In [1]:
import numpy as np

In [2]:
# Sample data (4 data points with 2 features)
data = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])

In [3]:
# Step 1: Calculate the mean of the data for feature scaling
mean = np.mean(data, axis=0)

In [4]:
# Step 2: Center the data by subtracting the mean
centered_data = data - mean

In [5]:
# Step 3: Calculate the covariance matrix
covariance_matrix = np.cov(centered_data, rowvar=False)

In [6]:
# Step 4: Calculate eigenvalues and eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

In [7]:
# Step 5: Sort eigenvectors by eigenvalues in descending order
eigen_pairs = [(np.abs(eigenvalues[i]), eigenvectors[:, i]) for i in range(len(eigenvalues))]
eigen_pairs.sort(key=lambda x: x[0], reverse=True)

In [8]:
# Step 6: Choose the top principal component(s)
k = 1  # Reducing dimensionality to 1
top_eigenvector = eigen_pairs[0][1]

In [9]:
# Step 7: Project the data onto the top principal component
reduced_data = centered_data.dot(top_eigenvector)

In [10]:
# Print the reduced data
print("Reduced data (dimensionality reduced to 1):")
print(reduced_data)

Reduced data (dimensionality reduced to 1):
[-2.12132034 -0.70710678  0.70710678  2.12132034]


In [12]:
## INBUILT FUNCTION FOR PCA

from sklearn.decomposition import PCA

# Sample data (4 data points with 2 features)
data = [[1, 2], [2, 3], [3, 4], [4, 5]]

# Create a PCA object and specify the number of components
pca = PCA(n_components=1)

# Fit the PCA model and transform the data
reduced_data = pca.fit_transform(data)

# Print the reduced data
print("Reduced data (dimensionality reduced to 1):")
print(reduced_data)


Reduced data (dimensionality reduced to 1):
[[ 2.12132034]
 [ 0.70710678]
 [-0.70710678]
 [-2.12132034]]
