In [1]:
import numpy as np

In [2]:
# Sample data (4 data points with 2 features)
data = np.array([[4, 11], [8, 4], [13, 5], [7, 14]])

In [14]:
# Step 1: Calculate the mean of the data for feature scaling
mean = np.mean(data, axis=0)
mean

array([8. , 8.5])

In [5]:
# Step 2: Center the data by subtracting the mean
centered_data = data - mean

In [6]:
# Step 3: Calculate the covariance matrix
covariance_matrix = np.cov(centered_data, rowvar=False)

In [7]:
# Step 4: Calculate eigenvalues and eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

In [18]:
eigenvalues

array([ 6.61513568, 30.38486432])

In [16]:
eigenvectors

array([[-0.83025082,  0.55738997],
       [-0.55738997, -0.83025082]])

In [8]:
# Step 5: Sort eigenvectors by eigenvalues in descending order
eigen_pairs = [(np.abs(eigenvalues[i]), eigenvectors[:, i]) for i in range(len(eigenvalues))]
eigen_pairs.sort(key=lambda x: x[0], reverse=True)

In [17]:
eigen_pairs

[(30.384864324004713, array([ 0.55738997, -0.83025082])),
 (6.615135675995287, array([-0.83025082, -0.55738997]))]

In [15]:
# Step 6: Choose the top principal component(s)
k = 1  # Reducing dimensionality to 1
top_eigenvector = eigen_pairs[0][1]
top_eigenvector

array([ 0.55738997, -0.83025082])

In [20]:
centered_data

array([[-4. ,  2.5],
       [ 0. , -4.5],
       [ 5. , -3.5],
       [-1. ,  5.5]])

In [10]:
# Step 7: Project the data onto the top principal component
reduced_data = centered_data.dot(top_eigenvector)

In [11]:
# Print the reduced data
print("Reduced data (dimensionality reduced to 1):")
print(reduced_data)

Reduced data (dimensionality reduced to 1):
[-4.30518692  3.73612869  5.69282771 -5.12376947]


In [13]:
## INBUILT FUNCTION FOR PCA [CROSS CHECKING]

from sklearn.decomposition import PCA

# Sample data (4 data points with 2 features)
data = np.array([[4, 11], [8, 4], [13, 5], [7, 14]])

# Create a PCA object and specify the number of components
pca = PCA(n_components=1)

# Fit the PCA model and transform the data
reduced_data = pca.fit_transform(data)

# Print the reduced data
print("Reduced data (dimensionality reduced to 1):")
print(reduced_data)


Reduced data (dimensionality reduced to 1):
[[-4.30518692]
 [ 3.73612869]
 [ 5.69282771]
 [-5.12376947]]
