In [1]:
#importing required library
import os
import cv2
import numpy as np
import pandas as pd
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls
import math

In [2]:
#initializing global parameters
folder_tr = "SEASFR/train"
folder_te = "SEASFR/test"
total_tr = len(next(os.walk(os.getcwd()+"/"+folder_tr))[2])
total_te = len(next(os.walk(os.getcwd()+"/"+folder_te))[2])
dim = 72
threshold = 90
print(total_tr)

1056


In [3]:
#PCA algorithm
#preparing lookup array
lookup = []
def fn(text):
    ans=[]
    for x in text:
        if x == '_':
            return (ans)
        elif x== 'O':
            return (ans)
        elif x == 'o':
            return (ans)
        else:
            ans+=x

for filename in os.listdir(os.getcwd()+"/"+folder_tr):
    ans = fn(filename)
    ans = ''.join(str(e) for e in ans)
    lookup.append(ans)

#L is set of all training images
#Currently L is zero matrix

L = np.empty(shape=(dim*dim,total_tr), dtype='float64')
i = 0
for filename in os.listdir(os.getcwd()+"/"+folder_tr):
    #Read a image 
    image = cv2.imread(folder_tr+"/"+filename)
    resized = cv2.resize(image, (dim,dim), interpolation = cv2.INTER_AREA)
    #Convert it into grayscale image
    gray_image = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
    #flattening an image
    gray_vector = np.array(gray_image, dtype='float64').flatten()
    L[:, i] = gray_vector[:] 
    i = i + 1

#Mean face
mean_face = np.sum(L, axis=1) /total_tr

#subtract mean face
for i in range(total_tr):
    L[:,i] -= mean_face[:]

LT = L.transpose()

#calculate LTL: It is covariance matrix
C = np.matmul(LT,L)  
C = C/total_tr
print("Covariance (LT)(L)",C.shape)

#calculate eigenvector and eigenvalue of covariance matrix
evalues,evectors = np.linalg.eig(C)

#getting correct ordering
sort_indices = evalues.real.argsort()[::-1]
evalues = evalues.real[sort_indices]
evectors = evectors[sort_indices]

#Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(evalues[i], evectors[:,i]) for i in range(len(evalues))]

#decide value of K 
tot = sum(evalues)
var_exp = [(i / tot)*100 for i in sorted(evalues, reverse=True)]
cum_var_exp = np.cumsum(var_exp)

K = 0
for i in range(len(cum_var_exp)):
    K = i
    if(int(cum_var_exp[i])>=threshold):
        break


trace1 = Bar(
        x=['PC %s' %i for i in range(1,total_tr)],
        y=var_exp,
        showlegend=False)

trace2 = Scatter(
        x=['PC %s' %i for i in range(1,total_tr)], 
        y=cum_var_exp,
        name='cumulative explained variance')

data = Data([trace1, trace2])

layout=Layout(
        yaxis=YAxis(title='Explained variance in percent'),
        title='Explained variance by different principal components')

fig = Figure(data=data, layout=layout)
plotly.offline.plot(fig)

#taking K eigenvector and putting into cols of P
P = np.reshape(eig_pairs[0][1],(C.shape[0],1))
for j in range(K-1):
    b = np.reshape(eig_pairs[j+1][1],(C.shape[0],1))
    P = np.hstack((P,b))
print(P.shape)
    
#projecting
evectors = np.matmul(L,P)

#normalize eigenvectors
norms = np.linalg.norm(evectors, axis=0)
evectors = evectors / norms 
#computing weights
W = np.matmul(evectors.transpose(),L)

Covariance (LT)(L) (1056, 1056)
(1056, 22)


In [4]:
W.shape

(22, 1056)

In [5]:
#LDA starts..
LDA_data = pd.DataFrame(W.transpose())
LDA_data['label'] = lookup

## This is our labeled data for LDA
X = np.array(LDA_data.iloc[:,0:K])
y = np.array(LDA_data.iloc[:,[K]])
ltt = np.unique(y)
y = np.reshape(y,(y.shape[0],))

#### number of components taken in LDA
l = len(ltt)-2

#Apply LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
sklearn_lda = LinearDiscriminantAnalysis(n_components = l)
X_lda_sklearn = sklearn_lda.fit_transform(X, y)


The priors do not sum to 1. Renormalizing



In [6]:
######### PCA_LDA classification
#classification
test_no = []
i = 0
wrong = 0
for filename in os.listdir(os.getcwd()+"/"+folder_te):
    ans = fn(filename)
    #print(filename)
    ans = ''.join(str(e) for e in ans)
    test_no.append(ans)
    test = cv2.imread(folder_te+"/"+filename)    # read as a grayscale image
    test = cv2.resize(test, (dim,dim), interpolation = cv2.INTER_AREA)
    test= cv2.cvtColor(test, cv2.COLOR_BGR2GRAY)
    img_col = np.array(test, dtype='float64').flatten()  
    img_col -= mean_face                                           # subract the mean column
    img_col = np.reshape(img_col, (dim*dim, 1))                             # from row vector to col vector

    S = np.matmul(evectors.transpose(),img_col)                                # projecting the normalized probe onto the
                                                                            # Eigenspace, to find out the weight
    S = np.reshape(S, (1,S.shape[0]))
    Xn = S 
    X_lda_res = sklearn_lda.transform(Xn)
#     diff = means - X_lda_res                                    # finding the min ||W_j - S||
    
    if l>K:
        X_lda_res = np.reshape(X_lda_res, (K,1))
        X_lda_res = X_lda_res.T
    else:
        X_lda_res = np.reshape(X_lda_res, (l,1))
        X_lda_res = X_lda_res.T
    

    norm = np.linalg.norm( X_lda_sklearn - X_lda_res,axis = 1)

    point = []
    point.append(np.array(norm).argsort())
    #print(len(point))
    #print(point)
    #print(dic)    
    recg = [(point[0][i],lookup[point[0][i]]) for i in range(len(point[0]))]
    #print(recg)
    
    knn = []
    for i in range(int(math.sqrt(len(point[0])))):
        knn.append(recg[i][1])
    
    knn_unique = np.unique(knn)
    knn_classify = []
    for i in range(len(knn_unique)):
        knn_classify.append(knn.count(knn_unique[i]))
    classified = knn_unique[np.argmax(knn_classify)]
    
    if ans!= classified:
        wrong = wrong + 1
    #print(wrong)

#Accuracy
1 - wrong/total_te

1.0

In [7]:
K

22