In [113]:
#data generation and visualization
import numpy as np
import matplotlib.pyplot as plt
linalg = np.linalg

N = 1000
mean = [1, 1, 2, 1]
cov = [[1.00, 0.4, 0.35, 0.20],
       [0.40, 1.0, 0, 0],
       [0.35, 0, 1.0, 0],
       [0.20, 0,   0, 1.0]]
data = np.random.multivariate_normal(mean, cov, N)

In [114]:
data

array([[ 0.81656319,  3.18856595,  1.44813922,  1.00047703],
       [ 1.56439989, -0.89911994,  2.49666464,  1.25320522],
       [-0.01432161,  1.31814505,  2.13205664,  2.1543507 ],
       ..., 
       [ 0.20342303,  0.65665006,  2.77868416,  1.78622714],
       [ 3.10490998,  1.76990431,  2.30730261,  0.46955475],
       [ 1.27199493, -0.28681438,  2.05182988,  0.04007219]])

In [115]:

def principal_component_analysis(data):
    nrow = data.shape[0]
    ncol = data.shape[1]
    # computing the covariance matrix
    mean_vector = np.mean(data, axis=0)
    covariance_matrix = (data - mean_vector).T.dot((data - mean_vector)) / (data.shape[0]-1)
    eigen_val, eigen_vec = np.linalg.eig(covariance_matrix)
    
    eig_vals, eig_vecs = np.linalg.eig(covariance_matrix)
    # Make a list of (eigenvalue, eigenvector) tuples
    eig_pairs = [(np.abs(eigen_val[i]), eigen_vec[:,i]) for i in range(len(eigen_val))]
    # Sort the (eigenvalue, eigenvector) tuples from high to low
    eig_pairs.sort()
    eig_pairs.reverse()

    # verification of correctly sorted eigenvalues by decreasing order
    print('Eigenvalues in descending order:')
    for i in eig_pairs:
        print(i[0])
    
    # Here, we are reducing the 4-dimensional feature space to a 2-dimensional feature subspace, 
    # by choosing the "top 2" eigenvectors with the highest eigenvalues to construct our 
    # d×k d×k-dimensional eigenvector matrix WW.
    matrix_w = np.hstack((eig_pairs[0][1].reshape(4,1), 
                      eig_pairs[1][1].reshape(4,1)))
    print('Matrix W: \n', matrix_w)
    # Projection onto the New Feature Space: T is the new matrix
    T = data.dot(matrix_w)
    result =(T, eig_pairs)
    return(result)

In [116]:
# Exhibition of results obtained
principal_component_analysis(data)

Eigenvalues in descending order:
1.65151307918
1.00306395696
0.938638142983
0.466588904439
('Matrix W: \n', array([[-0.72401025, -0.08319829],
       [-0.53537919,  0.0635493 ],
       [-0.30969352, -0.65409746],
       [-0.30539843,  0.74913019]]))


(array([[-3.05231543, -0.06304216],
        [-1.80719922, -0.88154197],
        [-2.0135579 ,  0.30427506],
        ..., 
        [-1.90488859, -0.45460836],
        [-4.05351454, -1.30329016],
        [-1.41505932, -1.43613208]]),
 [(1.6515130791820885,
   array([-0.72401025, -0.53537919, -0.30969352, -0.30539843])),
  (1.0030639569550748,
   array([-0.08319829,  0.0635493 , -0.65409746,  0.74913019])),
  (0.93863814298268089,
   array([ 0.04741339, -0.66768295,  0.52785617,  0.52279949])),
  (0.4665889044394339,
   array([-0.68310993,  0.51335181,  0.4445383 ,  0.26873118]))])