In [61]:
# Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from math import sqrt
from scipy import stats
from sklearn.decomposition import PCA
import scipy.linalg as la
from scipy.spatial import distance

In [62]:
M = np.array([[1, -2, 1, 2], [-1, 2, 3, 3], [1, 2, 0, 1], [-1, -1, 1, 1], [-1, -2, 3, -2], [1, 2, 1, 0]])
M

array([[ 1, -2,  1,  2],
       [-1,  2,  3,  3],
       [ 1,  2,  0,  1],
       [-1, -1,  1,  1],
       [-1, -2,  3, -2],
       [ 1,  2,  1,  0]])

In [63]:
# compute the row means
row_means = np.mean(M, axis=1)

In [64]:
# subtract the row means
M = M.astype(np.float)
for i in range(M.shape[1]): 
    M[:,i] -= row_means  

In [95]:
# compute the C matrix
A = M
A_transpose = np.transpose(A)
C = (np.matmul(A, A_transpose))

In [96]:
# compute the eigenvalues/eigenvectors (use svd)
D,U,V = la.svd(C, full_matrices=True, compute_uv=True)
print(D)

[[-0.34090297  0.5407334   0.55198553  0.10381803  0.08650431  0.5181152 ]
 [-0.37056232  0.3304082  -0.786594   -0.01593028  0.30655904  0.2013744 ]
 [ 0.26854707  0.01875492 -0.16367471  0.91315957 -0.18608734  0.17958956]
 [-0.34848415  0.30395455 -0.1051995  -0.03519041 -0.83099302 -0.28864465]
 [-0.73328532 -0.61575309  0.10271332  0.26181537  0.06241564 -0.01215468]
 [ 0.12329898 -0.3557328  -0.16785553 -0.29209927 -0.41166122  0.75847816]]


In [97]:
# principal components
u1 = (np.array([[-0.341, -0.371,  0.269, -0.348, -0.733,
          0.123]]))
u2 = (np.array([[ 0.541 ,  0.330 ,  0.019,  0.304, -0.616,
         -0.356]]))
u3 = np.array([[0.552,-0.787, -0.164,-0.105, 0.103, -0.168  ]])

In [98]:
X1 = A[:,0]
X1 = X1.reshape((6, 1))
X2 = A[:, 1]
X2 = X2.reshape((6, 1))
X3 = A[:,2]
X3 = X3.reshape((6, 1))
X4 = A[:, 3]
X4 = X4.reshape((6, 1))

In [100]:
print(np.dot(u1, X1))
print(np.dot(u2, X1))
print(np.dot(u3, X1))

print(np.dot(u1, X2))
print(np.dot(u2, X2))

print(np.dot(u1, X3))
print(np.dot(u2, X3))

print(np.dot(u1, X4))
print(np.dot(u2, X4))

[[1.56425]]
[[-0.633]]
[[2.49375]]
[[2.59925]]
[[-0.987]]
[[-3.81675]]
[[-1.188]]
[[-0.34675]]
[[2.808]]


In [101]:
score_matrix = np.array([[1.56372171,
-0.63433385,
2.59987503,
-0.98653425,
-3.81718422,
-1.18655924,
-0.3464125,
2.80742733]])
score_matrix = score_matrix.reshape(2,4)

In [102]:
score_matrix

array([[ 1.56372171, -0.63433385,  2.59987503, -0.98653425],
       [-3.81718422, -1.18655924, -0.3464125 ,  2.80742733]])

In [103]:
# part B
B = np.array([-1, -2, -1, 0, 2, 1, 3, 2, 1, 2, 0, 3, 2, 3, 1, 1, -1, 2, 3, 1, 0, 1, -1, -2])
B = B.reshape(6,4)

In [104]:
B

array([[-1, -2, -1,  0],
       [ 2,  1,  3,  2],
       [ 1,  2,  0,  3],
       [ 2,  3,  1,  1],
       [-1,  2,  3,  1],
       [ 0,  1, -1, -2]])

In [105]:
# subtract the row means
B = B.astype(np.float)
for i in range(B.shape[1]): 
    B[:,i] -= row_means  

In [106]:
a = (np.dot(u1, B[:,0]))
b = (np.dot(u2, B[:,0]))
d = (np.dot(u1, B[:,1]))
e = (np.dot(u2, B[:,1]))
g = (np.dot(u1, B[:,2]))
h = (np.dot(u2, B[:,2]))
j = (np.dot(u1, B[:,3]))
k = (np.dot(u2, B[:,3]))

In [76]:
sum_sq = np.sum(np.square(a - score_matrix[:,0])) 
sum_sq

16.861218365867906

In [111]:
dst = distance.euclidean(score_matrix[:,0], a)
dst2 = distance.euclidean(score_matrix[:,0], b)
dst


4.106858941005563

In [81]:
W = score_matrix[:,1]
W.reshape(2,1)

array([[-0.63433385],
       [-1.18655924]])

In [82]:
dst = distance.euclidean(b, W)
dst

2.092273579632036

In [83]:
sum_sq = np.sum(np.square(b - score_matrix[:,0])) 
sum_sq

20.053215459110035

In [84]:
sum_sq = np.sum(np.square(d - score_matrix[:,1])) 
sum_sq

0.8011209264391677

In [85]:
sum_sq = np.sum(np.square(e - score_matrix[:,1])) 
sum_sq

3.5218625382537416

In [86]:
sum_sq = np.sum(np.square(g - score_matrix[:,2])) 
sum_sq

44.99719192617756

In [87]:
sum_sq = np.sum(np.square(h - score_matrix[:,2])) 
sum_sq

18.740413799294533

In [88]:
sum_sq = np.sum(np.square(k - score_matrix[:,3])) 
sum_sq

7.949046840470971

In [89]:
sum_sq = np.sum(np.square(l - score_matrix[:,3])) 
sum_sq

NameError: name 'l' is not defined

In [96]:
# part 3
Y1 = np.array([[1, 5, 1, 5, 5, 1], [-2, 3, 2, 3, 0, 2], [2, -3, 2, 3, 0, 0], [2, -2, 2, 2, -1, 1]])
Y1.reshape(6,4)

array([[ 1,  5,  1,  5],
       [ 5,  1, -2,  3],
       [ 2,  3,  0,  2],
       [ 2, -3,  2,  3],
       [ 0,  0,  2, -2],
       [ 2,  2, -1,  1]])

In [98]:
Y1 = Y1.astype(np.float)
for i in range(Y1.shape[1]): 
    Y1[:,i] -= row_means  
#print(Y1)

ValueError: operands could not be broadcast together with shapes (4,) (6,) (4,) 