In [1]:
import os
import cv2
import numpy as np
from sklearn import preprocessing

In [2]:
D = 48*48

categories = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

train_dir = 'FER2013/train'
test_dir = 'FER2013/test'

In [3]:
def build_list(dir, categories):
    im_list = []
    lb_list = []
    for category in categories:
        path = os.path.join(dir, category)

        for img in os.listdir(path):
            img_path = os.path.join(path, img)
            im_list.append(img_path)
            lb_list.append(category)

    return im_list, lb_list

In [4]:
def vectorize_img(imgPath):
    rgb = cv2.imread(imgPath)
    img = cv2.resize(rgb, (48, 48))
    imGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    imVec = imGray.reshape(1, D)
    return imVec

In [5]:
def build_data_matrix(h, w, im_list, lb_list):
    X_full = np.zeros((h, w))
    lb = preprocessing.LabelEncoder()
    y = lb.fit_transform(lb_list)
    y_full = y.reshape(-1, 1)

    for i in range(len(im_list)):
        X_full[i, :] = vectorize_img(im_list[i])

    return X_full, y_full

In [6]:
im_train_list, lb_train_list = build_list(train_dir, categories)
im_test_list, lb_test_list = build_list(test_dir, categories)

In [7]:
h_train = len(im_train_list)
h_test = len(im_test_list)
w = D

X_train, y_train = build_data_matrix(h_train, w, im_train_list, lb_train_list)
X_test, y_test = build_data_matrix(h_test, w, im_test_list, lb_test_list)

In [8]:
y_train = y_train.reshape(-1,1)
y_test = y_test.reshape(-1,1)

In [9]:
# export file csv
# train_data = np.concatenate((X_train, y_train), axis=1)
# train_data.shape
# import pandas as pd
# df = pd.DataFrame(data=train_data)
# df.to_csv('train_data.csv')

In [10]:
#Shuffle the dataset
from sklearn.utils import shuffle

X_train, y_train = shuffle(X_train, y_train, random_state=42)
X_test, y_test = shuffle(X_test, y_test, random_state=42)

In [11]:
from time import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [12]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [13]:
t0 = time()
pca = PCA(n_components=150, svd_solver='randomized', whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

done in 9.566s


In [14]:
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t0))

done in 0.907s


In [15]:
# Train a SVM classification model

t0 = time()
clf = SVC(kernel='rbf', C=1000, gamma='scale')
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))

  return f(*args, **kwargs)


done in 488.319s


In [16]:
clf.score(X_test_pca, y_test)

0.48969384662018794

In [None]:
import joblib
joblib.dump(pca, 'pca1.joblib')
joblib.dump(clf, 'svm1.joblib')