Import libraries

In [19]:
import pandas as pd
import numpy as np
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
%matplotlib inline

data = np.array([[0.5, 0.7],
                 [1.0, 1.1],
                 [1.5, 1.6],
                 [1.1, 0.9],
                 [1.9, 2.2],
                 [2.2, 2.9],
                 [2.6, 2.7],
                 [2.0, 1.6],
                 [2.5, 2.4],
                 [3.1, 3.0]])


Standardize the features

In [20]:
sc = StandardScaler()
std_data = sc.fit_transform(data)
print("Standardized dataset")
print(std_data)

Standardized dataset
[[-1.740994   -1.506743  ]
 [-1.09136937 -1.00864614]
 [-0.44174475 -0.38602507]
 [-0.96144445 -1.25769457]
 [ 0.07795496  0.36112022]
 [ 0.46772973  1.23278973]
 [ 0.98742943  0.9837413 ]
 [ 0.20787988 -0.38602507]
 [ 0.85750451  0.61016865]
 [ 1.63705406  1.35731394]]


Compute Covariance Matrix



In [21]:
Sigma = np.cov(std_data.T)
print("Covariance Matrix")
print(Sigma)

Covariance Matrix
[[1.11111111 1.03831938]
 [1.03831938 1.11111111]]


Obtain eigenvectors and eigenvalues

In [22]:
eigenval, eigenvec = np.linalg.eig(Sigma)
print("Eigenvalues")
print(eigenval)
print("")
print("Eigenvector")
print(eigenvec)

Eigenvalues
[2.1494305  0.07279173]

Eigenvector
[[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]


Obtain the feature vector usign eigenvectors

In [23]:
featurevec = np.dot(std_data,eigenvec)
print("Feature Vector")
print(featurevec)

Feature Vector
[[-2.29649685  0.16564047]
 [-1.48493521  0.05849416]
 [-0.58532165  0.03939976]
 [-1.56916825 -0.20948047]
 [ 0.31047304  0.20022808]
 [ 1.20244884  0.54097911]
 [ 1.39382819 -0.0026079 ]
 [-0.12596767 -0.41995421]
 [ 1.03780164 -0.17489286]
 [ 2.11733792 -0.19780613]]


List the eigenvalue, eigenvector

In [24]:
eigenlist = [(np.abs(eigenval[i]), eigenvec[:, i]) for i in range(len(eigenval))]
eigenlist.sort(key=lambda k: k[0], reverse=True)
print(eigenlist)

[(2.149430495007601, array([0.70710678, 0.70710678])), (0.07279172721462168, array([-0.70710678,  0.70710678]))]


Choose the eigenvector with the highest eigenvalues

In [25]:
w = np.hstack((eigenlist[0][1][:, np.newaxis]))
print('Eigenvector with the highest value:\n', w)

Eigenvector with the highest value:
 [0.70710678 0.70710678]


Obtain 1D Dataset

In [26]:
pca_data = std_data.dot(w)
pca_data = pca_data.reshape(10, 1)
print("1D Dataset")
print(pca_data)

1D Dataset
[[-2.29649685]
 [-1.48493521]
 [-0.58532165]
 [-1.56916825]
 [ 0.31047304]
 [ 1.20244884]
 [ 1.39382819]
 [-0.12596767]
 [ 1.03780164]
 [ 2.11733792]]
