In [180]:
import numpy as np

#load in data
X = np.array([[-2, 1, 4, 6, 5, 3, 6, 2], [9, 3, 2, -1, -4, -2, -4, 5], [0, 7, -5, 3, 2, -3, 4, 6]])

#compute mean of data
mu = mean(X)
print(f'mean = \n{mu}\n')

#center data matrix
X1 = center(X)
print(f'centered data matrix = \n{X1}\n')

#compute covariance matrix
C = covariance(X1)
print(f'covariance matrix = \n{C}\n')

#get the first principal value and the first principal component
fpv, fpc = first_principals(C)
print(f'first principal value = \n{fpv}')
print(f'first principal component = \n{fpc}\n')

#1D representation of the data matrix
X_1d = oneD(X, fpc)
print(f'1D representation of the data matrix: \n{X_1d}\n')

#k-dimensional representation of the data matrix
X_2d, W = mypca(X,2)
print(f'2D representation of the data matrix: \n{X_2d}\n')

#find the optimal representation of the data matrix
X_kd = optimal(X)

mean = 
[[3.125]
 [1.   ]
 [1.75 ]]

centered data matrix = 
[[-5.125 -2.125  0.875  2.875  1.875 -0.125  2.875 -1.125]
 [ 8.     2.     1.    -2.    -5.    -3.    -5.     4.   ]
 [-1.75   5.25  -6.75   1.25   0.25  -4.75   2.25   4.25 ]]

covariance matrix = 
[[ 52.875 -78.     -1.75 ]
 [-78.    148.      6.   ]
 [ -1.75    6.    123.5  ]]

first principal value = 
192.3336221694256
first principal component = 
[-0.48990322  0.86889736 -0.07079702]

1D representation of the data matrix: 
[ 8.79988264  1.62120968  0.13216694 -4.02070776 -6.06669958 -2.99511331
 -6.69819685  2.93989819]

2D representation of the data matrix: 
[ 0.76838991  7.24895408 -4.75449724  2.97411133  1.70135538 -3.12727651
  3.70577719  6.4415216 ]

the optimal representation of the data is of dimension d = 1
1-dimensional representation: 
[ 8.79988264  1.62120968  0.13216694 -4.02070776 -6.06669958 -2.99511331
 -6.69819685  2.93989819]


In [33]:
def mean(X):
    n = np.size(X,1)
    ones = np.ones((n,1))
    return (1/n)*(X.dot(ones))

In [41]:
def center(X):
    I = np.identity(n)
    return X.dot(I-(1/n)*(ones.dot(np.transpose(ones))))    

In [49]:
def covariance(X1):
    return X1.dot(np.transpose(X1))

In [90]:
def first_principals(C):
    u, v = np.linalg.eig(C)
    i = np.argmax(u)
    return u[i], v[i]

In [131]:
def oneD(X, fpc):
    return np.transpose(fpc).dot(X)

In [135]:
def mypca(X, k):
    d = np.size(X,0)
    mu = mean(X)
    X1 = center(X)
    u, v = np.linalg.eig(C)
    for j in range(0,k):
        i = np.argmax(u)
        W = v[i]
        u = np.delete(u,i)
        v = np.delete(v,i,0)
        j += 1
    return np.transpose(W).dot(X), W
    

In [179]:
def optimal(X):
    d = np.size(X,0)
    Z, W = mypca(X,1)
    W.shape = (d,1)
    err = np.linalg.norm(X-W.dot(np.transpose(W).dot(X)))
    ERR = np.array(err)
    for k in range(1,d):
        Z, W = mypca(X,k)
        W.shape = (d,1)
        err = np.linalg.norm(X-W.dot(np.transpose(W).dot(X)))
        ERR = np.append(ERR, err)
        k += 1
    min = np.argmin(ERR)
    print(f'the optimal representation of the data is of dimension d = {min+1}')
    Z, W = mypca(X,min+1)
    print(f'{min+1}-dimensional representation: \n{Z}')
    return Z
    