In [None]:
#Imports
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from scipy import ndimage, misc

# SK-Learn
from skimage.feature import hog
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

#Image Directory
train_dir = 'Data/train/'
test_dir = 'Data/test/'

In [None]:
#Import pictures
def load_images_from_folder(folder):
    #create an integer value for each of 7 categories
    class_names = os.listdir(folder) # Get names of classes
    class_name2id = { label: index for index, label in enumerate(class_names) } # Map class names to integer labels

    images, labels = [], []
    for expression in os.listdir(folder):
        path = folder + expression
        label_id = class_name2id[expression]
      
        for filename in os.listdir(path):
            img = cv2.imread(path + '/' +filename)

            if img is not None:
                images.append(img)
                labels.append(label_id)
    return images, labels

In [None]:
class_names = os.listdir(train_dir) # Get names of classes
class_name2id = { label: index for index, label in enumerate(class_names) } # Map class names to integer labels

print(class_names)

training, labels_training = load_images_from_folder(train_dir)
testing, labels_testing = load_images_from_folder(test_dir)

#Display first image in training data
img = training[0]
imgplot = plt.imshow(img)
plt.show()

## Preprocessing

In [None]:
#Method to perform equalization histogram on a list of images
def equalizing(images):
    equ_hist_images = []
    for img in images:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        equ = cv2.equalizeHist(img)
        equ_hist_images.append(equ)
    return equ_hist_images
#inputs a list of images and the filtervalue (often 3)
#outputs a new list of images with the median filter applied
def apply_median_filter(images, filter_value): 
    images_med = []
    for img in images:
        img_med = ndimage.median_filter(img, filter_value)
        images_med.append(img_med)
    return images_med

training_equalized = equalizing(training)
testing_equalized = equalizing(testing)

training_equ_med = apply_median_filter(training_equalized, 3)
testing_equ_med = apply_median_filter(testing_equalized, 3)

## Feature Extraction - HoG

In [None]:
#Caculate HoG for every image in data

hog_features_training = np.asarray([hog(image_training, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2)) for image_training in training])
hog_features_test = np.asarray([hog(image_test, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2)) for image_test in testing])

#print shape
print(hog_features_training.shape)
print(hog_features_test.shape)

In [None]:
#Split up into training and test 
X_trn_hog = hog_features_training
Y_trn = labels_training 

X_test_hog = hog_features_test
Y_test = labels_testing 

## Linear SVM

In [None]:
#Linear SVM
# create SVM models
svm_hog = svm.SVC(kernel='linear', gamma='scale')

# train SVM models
svm_hog.fit(X_trn_hog, Y_trn)

# evaluate SVM models with the confusion matrix and with accuracy
hog_predictions = svm_hog.predict(X_test_hog)
print('HoG Accuracy: {}'.format(accuracy_score(Y_test, hog_predictions))) 
plt.subplot(111), plt.imshow(confusion_matrix(Y_test, hog_predictions)), plt.title('HoG Confusion Matrix')
plt.show()

## Random Forest

In [None]:
def randomForest(nbr_estimators_list):
    for nbr_estimator in nbr_estimators_list:
        # create random forest classifier
        rf = RandomForestClassifier(n_estimators=nbr_estimator, criterion='entropy')

        # train classifier
        rf.fit(X_trn_hog, Y_trn)

        # evaluate classifier performance
        rf_predictions = rf.predict(X_test_hog)
        print('Random Forest Accuracy: {}'.format(accuracy_score(Y_test, rf_predictions)))
        plt.imshow(confusion_matrix(Y_test, rf_predictions)), plt.title('Random Forest Confusion Matrix')
        plt.show()

In [None]:
nbr_estimators_list = [100, 200, 300, 400, 500, 1000]
randomForest(nbr_estimators_list)

## Comparison of Classifiers

In [None]:
#Performance Analysis

# predictions on training set for both classifiers
svm_linear = svm_hog.predict(X_trn_hog)

# create random forest classifier
rf = RandomForestClassifier(n_estimators=500, criterion='entropy')

# train classifier
rf.fit(X_trn_hog, Y_trn)

# evaluate classifier performance
rf_predictions = rf.predict(X_test_hog)

print('SVM RBF Training Set Accuracy: {}'.format(accuracy_score(Y_trn, svm_linear)))
print('Random Forest 1000-tree Training Set Accuracy: {}'.format(accuracy_score(Y_trn, rf_predictions)))
plt.subplot(121), plt.imshow(confusion_matrix(Y_trn, svm_linear)), plt.title('SVM RBF Training Set')
plt.subplot(122), plt.imshow(confusion_matrix(Y_trn, rf_predictions)), plt.title('RF 500-tree Training Set')
plt.show()

# compare confusion matrices
print(np.subtract(confusion_matrix(Y_trn, svm_linear), confusion_matrix(Y_trn, rf_predictions)))

In [None]:
#Exploring different parameters of HoG
svm_linear_hogtest = svm.SVC(kernel='linear', gamma='scale')
rf500_hogtest = RandomForestClassifier(n_estimators=500, criterion='entropy')
variedHoGpredictions = np.empty((8, 2, 10000))   # saved in order of for loop

index = 0
for cellsize in [6, 8, 10]:
    for blocksize in [1, 2, 3]:
        for orientationbins in [8, 9, 10]:
            hogIterFeatures = np.asarray([hog(image, orientations=orientationbins, pixels_per_cell=(cellsize,cellsize), cells_per_block=(blocksize,blocksize)) for image in training])
            testHogIterFeatures = np.asarray([hog(testImage, orientations=orientationbins, pixels_per_cell=(cellsize,cellsize), cells_per_block=(blocksize,blocksize)) for testImage in testing])
            
            svm_linear_hogtest.fit(hogIterFeatures, Y_trn)
            rf500_hogtest.fit(hogIterFeatures, Y_trn)
            
            svm_linear_hogtest_predictions = svm_linear_hogtest.predict(testHogIterFeatures)
            rf500_hogtest_predictions = rf500_hogtest.predict(testHogIterFeatures)
            variedHoGpredictions[index][0] = svm_linear_hogtest_predictions
            variedHoGpredictions[index][1] = rf500_hogtest_predictions
            print('SVM-Linear Accuracy with cellsize={}, blocksize={}, orientationbins={}: {}'.format(cellsize, blocksize, orientationbins, accuracy_score(Y_test, svm_linear_hogtest_predictions)))
            print('RF 500-tree Accuracy with cellsize={}, blocksize={}, orientationbins={}: {}'.format(cellsize, blocksize, orientationbins, accuracy_score(Y_test, rf500_hogtest_predictions)))
            plt.figure(figsize=(8, 4))
            plt.subplot(121), plt.imshow(confusion_matrix(Y_test, svm_rbf_hogtest_predictions)), plt.title('SVM-Linear ({}, {}, {})'.format(cellsize, blocksize, orientationbins))
            plt.subplot(122), plt.imshow(confusion_matrix(Y_test, rf1000_hogtest_predictions)), plt.title('RF 500-tree ({}, {}, {})'.format(cellsize, blocksize, orientationbins))
            plt.show()