In [20]:
import numpy as np
import pandas as pd

In [21]:
# Generating a dummy dataset
# randint(start, stop, numberLimit)
X = np.random.randint(10, 50, 100).reshape(20,5)
# X = [[4, 11], [8, 4], [13, 5], [7, 14]]
# print(X)

# mean centering the data
# axis = 0 implies that the mean is calculated on each column, i.e., vertically
X_meaned = X - np.mean(X, axis = 0)
# print(X_meaned)

In [22]:
# if rowvar = True, then rows are variables and columns are observations
covariance_matrix = np.cov(X_meaned, rowvar = False)
# print(covariance_matrix)

In [23]:
# Calculate the Eigen values and it's corresponding Eigen Vectors from the Covariance Matrix
# it returns eigen_values -> 1D and eigen_vector -> of N Dimension, where N = number of variables
# linalg -> linearAlgebra
eigen_values, eigen_vectors = np.linalg.eigh(covariance_matrix)
print(eigen_values)
print(eigen_vectors)

[ 39.67757079 112.89117052 132.9582837  152.87183065 257.89851276]
[[ 0.52954264  0.45434797  0.23892529 -0.5457519   0.39777139]
 [-0.57635048 -0.3753433   0.32004676 -0.63904897  0.12698039]
 [ 0.041607   -0.35904807 -0.55366462  0.0448766   0.74886242]
 [ 0.3677713  -0.52729326  0.65656754  0.34488947  0.19151083]
 [-0.50041766  0.49571326  0.3207169   0.41570256  0.4776846 ]]


In [24]:
# Sorting the eigen values in descending order, by doing this the columns in the eigen_vector matrix will also get reshuffled as per descending order of the eigen value
sorted_index = np.argsort(eigen_values)[::-1]
# print(sorted_index)
sorted_eigen_values = eigen_values[sorted_index]
print(sorted_eigen_values)
sorted_eigen_vectors = eigen_vectors[:, sorted_index]
print(sorted_eigen_vectors)

[257.89851276 152.87183065 132.9582837  112.89117052  39.67757079]
[[ 0.39777139 -0.5457519   0.23892529  0.45434797  0.52954264]
 [ 0.12698039 -0.63904897  0.32004676 -0.3753433  -0.57635048]
 [ 0.74886242  0.0448766  -0.55366462 -0.35904807  0.041607  ]
 [ 0.19151083  0.34488947  0.65656754 -0.52729326  0.3677713 ]
 [ 0.4776846   0.41570256  0.3207169   0.49571326 -0.50041766]]


In [25]:
# reducing the number of dimensions to n
# the first 'n' eigen values in the sorted_eigen_values will be selected
n_components = 2
selected_eigen_values = sorted_eigen_values[: n_components]
print(selected_eigen_values)
selected_eigen_vectors = sorted_eigen_vectors[:, 0: n_components]
print(selected_eigen_vectors)

[257.89851276 152.87183065]
[[ 0.39777139 -0.5457519 ]
 [ 0.12698039 -0.63904897]
 [ 0.74886242  0.0448766 ]
 [ 0.19151083  0.34488947]
 [ 0.4776846   0.41570256]]


In [26]:
# Obtaining the new dataset
# X_reduced = np.dot(selected_eigen_vectors.transpose(),X.transpose()).transpose()

transformed_dataset = np.dot(X, selected_eigen_vectors)
print(transformed_dataset)
# print(X_reduced)

[[ 42.31722431   5.98772019]
 [ 59.5804389   -2.69044621]
 [ 71.46317836 -16.14942806]
 [ 81.6054247  -24.45071197]
 [ 41.05373742  -9.06748934]
 [ 76.2369001   13.12297865]
 [ 76.48337567 -16.24018415]
 [ 60.09389586  -8.36756348]
 [ 57.59174488 -19.94117322]
 [ 29.41893915 -21.90073487]
 [ 73.67090275  14.81555988]
 [ 49.19525888 -10.2984548 ]
 [ 43.45550509 -19.14234534]
 [ 59.08203284  -3.69584995]
 [ 67.64646947 -25.10853547]
 [ 79.26031267 -19.28794592]
 [ 43.99362352 -24.67468609]
 [ 34.47768116   0.84313601]
 [ 75.05163268 -13.52046421]
 [ 53.45348083   1.50505796]]
