In [300]:
import cv2
import glob
import matplotlib.pyplot as plt
import numpy as np
import pickle

from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.model_selection import train_test_split

In [301]:
cascade_path = 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)
image_size = 100
emotions_dictionary = [None, 'anger', 'contempt', 'disgust', 'fear', 'happy', 'sadness', 'surprise']

In [302]:
def extract_face(image):
    x, y, w, h = face_cascade.detectMultiScale(
        image,
        scaleFactor=1.1,
        minNeighbors=6
    )[0]

    face = image[y:y + h, x:x + w]
    resized_face = cv2.resize(face, (image_size, image_size))

    return resized_face

In [303]:
def load_and_process_image(filename):
    return extract_face(cv2.imread(filename, cv2.IMREAD_GRAYSCALE)).flatten()

In [304]:
def images_emotions_data(participant_id, session_id):
    emotion_files = glob.glob('CK+/Emotion/{}/{}/*'.format(participant_id, session_id))
    if len(emotion_files) == 0:
        return [[], []]

    emotion_filename = emotion_files[0]
    with open(emotion_filename, 'r') as emotion_file:
        emotion = emotions_dictionary[int(float(emotion_file.read()))]

    if emotion == 'contempt' or emotion is None: # remove contempt
        return [[], []]

    session_images = sorted(glob.glob('CK+/cohn-kanade-images/{}/{}/*'.format(participant_id, session_id)))

    length = int(len(session_images) / 4)

    images = [session_images[-1 - i] for i in range(length)]
    data = (load_and_process_image(image_file) for image_file in images)

    return [data, [emotion for i in range(length)]]

In [306]:
def save_classifier(classifier, filename):
    with open(filename, 'sr+') as f:
        p = pickle.dumps(classifier)
        f.write(p)

def load_classifier(filename):
    with open(filename, 'rb') as f:
        return pickle.loads(f.read())

In [307]:
# Preprocessing

participants = glob.glob('CK+/Emotion/*')

images = []
emotions = []

for participant in participants:
    # Get the participant ID from the last 4 chars
    participant_id = participant[-4:]

    sessions = glob.glob('{}/*'.format(participant))
    for session in sessions:
        # Get the participant ID from the last 3 chars
        session_id = session[-3:]

        current_image, current_emotion = images_emotions_data(participant_id, session_id)
        images += current_image
        emotions += current_emotion

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=5, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=2017,
       shuffle=True, tol=None, verbose=0, warm_start=False)

In [325]:
X_train, X_test, y_train, y_test = train_test_split(np.array(images), np.array(emotions),
                                                    test_size=0.1, random_state=2017)
shuffle_index = np.random.permutation(len(X_train))
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

In [329]:
clf = SGDClassifier(random_state=2017, max_iter=800)
clf.fit(X_train, y_train)

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=800, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=2017,
       shuffle=True, tol=None, verbose=0, warm_start=False)

In [332]:
clf.predict([load_and_process_image('angry.jpeg')])

array(['disgust'],
      dtype='<U8')