In [1]:
import pandas as pd
import numpy.linalg as la
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
pca = pd.read_csv('EvandroEdit.csv', encoding='latin1', sep=';')
pca[0:3]

Unnamed: 0,ID,D,M,A,S,X,Y
0,22210384,30,11,2017,35773,369,402
1,22210384,30,11,2017,35773,639,447
2,22210384,30,11,2017,35773,702,460


In [3]:
# convert matrix to float
pca = pca.astype('float64')

# drops the empty line at file-end
pca.dropna(how="all", inplace=True)

pca.tail()

Unnamed: 0,ID,D,M,A,S,X,Y
432895,239747205.0,2.0,1.0,2018.0,41419.0,969.0,881.0
432896,239747205.0,2.0,1.0,2018.0,41419.0,1027.0,482.0
432897,674810745.0,2.0,1.0,2018.0,41419.0,1031.0,468.0
432898,925636237.0,2.0,1.0,2018.0,41419.0,1030.0,471.0
432899,925636237.0,2.0,1.0,2018.0,41419.0,1031.0,468.0


In [4]:
# split data table into data X and class labels y

X = pca.iloc[:,0:5].values
y = pca.iloc[:,5].values

In [5]:
# covariance matrix
cov_pca = np.cov(np.transpose(pca))

print('Covariance matrix \n%s' %cov_pca)

Covariance matrix 
[[ 8.34900963e+16 -1.10591835e+07 -2.72468955e+06  2.47168650e+05
   1.08568581e+10 -4.60293720e+07  1.73782005e+08]
 [-1.10591835e+07  5.49760930e+01  4.94143189e+00 -4.49425121e-01
  -5.81125486e+03  3.46280402e+01  6.50949059e+01]
 [-2.72468955e+06  4.94143189e+00  3.70580434e+00 -3.36882131e-01
  -2.84288797e+03 -2.28127187e+01  1.92536319e+01]
 [ 2.47168650e+05 -4.49425121e-01 -3.36882131e-01  3.06260932e-02
   2.58630199e+02  2.07418364e+00 -1.75396418e+00]
 [ 1.08568581e+10 -5.81125486e+03 -2.84288797e+03  2.58630199e+02
   2.02902932e+08 -1.11259478e+05  1.82967635e+04]
 [-4.60293720e+07  3.46280402e+01 -2.28127187e+01  2.07418364e+00
  -1.11259478e+05  3.16900411e+05  8.98658424e+02]
 [ 1.73782005e+08  6.50949059e+01  1.92536319e+01 -1.75396418e+00
   1.82967635e+04  8.98658424e+02  7.94507128e+04]]


In [6]:
# eigenvectors and eigenvalues 

eig_vals, eig_vecs = la.eig(cov_pca)

print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)

Eigenvectors 
[[-1.00000000e+00 -1.30038151e-07 -4.71986081e-10  2.07145979e-09
   1.32783127e-10  1.90888857e-11 -6.63174604e-15]
 [ 1.32461022e-10 -2.86337373e-05 -1.00010247e-04 -8.25309963e-04
   9.95569308e-01 -9.40269198e-02 -4.37785318e-06]
 [ 3.26348833e-11 -1.40093493e-05  7.66946995e-05 -2.46880521e-04
   9.36402879e-02  9.91481903e-01 -9.05266012e-02]
 [-2.96045472e-12  1.27449345e-06 -6.97346977e-06  2.24896440e-05
  -8.51626287e-03 -9.01251256e-02 -9.95894038e-01]
 [-1.30037677e-07  9.99999845e-01 -5.48822674e-04  9.21715361e-05
   2.98567928e-05  1.13753756e-05  9.21689802e-10]
 [ 5.51315354e-10 -5.49169827e-04 -9.99992514e-01  3.82819576e-03
  -8.91070913e-05  8.67144238e-05  2.53390261e-09]
 [-2.08146850e-09  9.00970052e-05 -3.82818373e-03 -9.99992297e-01
  -8.45307638e-04 -1.68871390e-04 -4.44253501e-08]]

Eigenvalues 
[8.34900963e+16 2.02901583e+08 3.16842772e+05 7.94452846e+04
 5.52105918e+01 3.22986365e+00 1.26710650e-06]


In [7]:
# Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]

# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs.sort()
eig_pairs.reverse()

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
print('Eigenvalues in descending order:')
for i in eig_pairs:
    print(i[0])

Eigenvalues in descending order:
8.349009629072845e+16
202901583.33402073
316842.77249009715
79445.28458337946
55.21059180364557
3.2298636461151062
1.2671065049019067e-06


In [8]:
# principal components
matrix_w = np.hstack((eig_pairs[2][1].reshape(7,1), 
                      eig_pairs[3][1].reshape(7,1)))

print('Matrix W:\n', matrix_w)

Matrix W:
 [[-4.71986081e-10  2.07145979e-09]
 [-1.00010247e-04 -8.25309963e-04]
 [ 7.66946995e-05 -2.46880521e-04]
 [-6.97346977e-06  2.24896440e-05]
 [-5.48822674e-04  9.21715361e-05]
 [-9.99992514e-01  3.82819576e-03]
 [-3.82818373e-03 -9.99992297e-01]]


In [9]:
Y = pca.dot(matrix_w)

In [10]:
Y[0:10]

Unnamed: 0,0,1
0,-390.195906,-397.223152
1,-660.366153,-441.189193
2,-723.415448,-453.947916
3,-728.786252,-453.318014
4,-1077.673171,-422.982105
5,-1126.707258,-431.794454
6,-1104.136529,-566.249594
7,-1102.178654,-577.257166
8,-999.398147,-708.877008
9,-752.835573,-1084.823512


In [11]:
'''# Fixing random state for reproducibility
matplotlib.rcParams['axes.unicode_minus'] = False
fig, ax = plt.subplots()
ax.plot(Y, 'o')
ax.set_title('Using hyphen instead of Unicode minus')
plt.show()'''

"# Fixing random state for reproducibility\nmatplotlib.rcParams['axes.unicode_minus'] = False\nfig, ax = plt.subplots()\nax.plot(Y, 'o')\nax.set_title('Using hyphen instead of Unicode minus')\nplt.show()"