In [7]:
from scipy.io import loadmat
import pandas as pd
import numpy as np
from random import shuffle
import os
import cv2
from sklearn.svm import SVC
from sklearn.preprocessing import OneHotEncoder,normalize
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV
from joblib import dump, load

In [8]:
fer_path = '../Data/challenges-in-representation-learning-facial-expression-recognition-challenge/fer2013/fer2013.csv'
data = pd.read_csv(fer_path)

In [9]:
training_pixels = data[data['Usage'] == 'Training']['pixels'].tolist()
width, height = 48, 48
image_size = (width, height)
X_train = []
for pixel_sequence in training_pixels:
    face = [int(pixel) for pixel in pixel_sequence.split(' ')]
    face = np.asarray(face).reshape(width, height)
    #     face = cv2.resize(face.astype('uint8'), image_size)
    X_train.append(face.astype('float32'))
    
X_train = np.asarray(X_train)
X_train = X_train.reshape(X_train.shape[0], -1)
y_train = data[data['Usage'] == 'Training']['emotion'].values

In [10]:
training_pixels = data[data['Usage'] == 'PublicTest']['pixels'].tolist()
width, height = 48, 48
image_size = (width, height)
X_test = []
for pixel_sequence in training_pixels:
    face = [int(pixel) for pixel in pixel_sequence.split(' ')]
    face = np.asarray(face).reshape(width, height)
    #     face = cv2.resize(face.astype('uint8'), image_size)
    X_test.append(face.astype('float32'))
X_test = np.asarray(X_test)

X_test = X_test.reshape(X_test.shape[0], -1)
y_test = data[data['Usage'] == 'PublicTest']['emotion'].values

In [11]:
y_train.shape

(28709,)

In [12]:
print(X_train.shape)
# y_train = np.reshape(y_train, (y_train.shape[0], 1))
print(y_train.shape)
print(X_test.shape)
# y_test = np.reshape(y_test, (y_test.shape[0], 1))
print(y_test.shape)

(28709, 2304)
(28709,)
(3589, 2304)
(3589,)


In [7]:
svm_model = load('svm_model.joblib')

In [8]:
svm_model

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [9]:
print("Accuracy scores for test data is - ")
print(svm_model.score(X_test, y_test))

Accuracy scores for test data is - 
0.31819448314293675


In [None]:
print("Accuracy scores for training data is - ")
print(svm_model.score(X_train, y_train))

In [None]:
svc = SVC()
svc.fit(X_train, y_train)
svc

In [18]:
svm_model.predict(X_test)

array([3, 3, 3, ..., 3, 3, 3])

In [None]:
dump(svc, 'svm_model.joblib')

In [13]:
print(svm_model.score(X_train, y_train))

0.9984325472848236


In [None]:
print("Accuracy scores for test data is - ")
print(svc.score(X_test, y_test))

In [10]:
def detectFaceOpenCVDnn(net, frame):
    result = []
    frameOpencvDnn = frame.copy()
    frameHeight = frameOpencvDnn.shape[0]
    frameWidth = frameOpencvDnn.shape[1]
    blob = cv2.dnn.blobFromImage(frameOpencvDnn, 1.0, (300, 300), [104, 117, 123], False, False)
    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    conf_threshold = 0.7
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            x1 = int(detections[0, 0, i, 3] * frameWidth)
            y1 = int(detections[0, 0, i, 4] * frameHeight)
            x2 = int(detections[0, 0, i, 5] * frameWidth)
            y2 = int(detections[0, 0, i, 6] * frameHeight)
            bboxes.append([x1, y1, x2, y2])
            if x1 > frameWidth or x1 < 0 or x2 > frameWidth or x2 < 0 or y1 < 0 or y1 > frameHeight or y2 < 0 or y2 > frameHeight:
                continue
            else:
                grayOpenDnn = gray = cv2.cvtColor(frameOpencvDnn, cv2.COLOR_BGR2GRAY)
                croppedOpenDnn = cv2.resize(gray[y1:y2,x1:x2], (48,48)) 
                result.append(croppedOpenDnn)
#                 cv2.rectangle(frameOpencvDnn, (x1, y1), (x2, y2), (0, 255, 0), int(round(frameHeight/150)), 8)
    return result, bboxes

In [11]:
def face_reduction(image):
    # OpenCV DNN supports 2 networks.
    # 1. FP16 version of the original caffe implementation ( 5.4 MB )
    # 2. 8 bit Quantized version using Tensorflow h( 2.7 MB )
#     print("printing image")
#     print(image)
    DNN = "TF"
    if DNN == "CAFFE":
        modelFile = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"
        configFile = "models/deploy.prototxt"
        net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
    else:
        modelFile = "models/opencv_face_detector_uint8.pb"
        configFile = "models/opencv_face_detector.pbtxt"
        net = cv2.dnn.readNetFromTensorflow(modelFile, configFile)

    conf_threshold = 0.7
    outOpencvDnn, bboxes = detectFaceOpenCVDnn(net,image)
    return outOpencvDnn, bboxes

In [12]:
y_translation = {0:"Angry", 1:"Disgust", 2:"Fear", 3:"Happy", 4:"Neutral", 5:"Surprise", 6:"Sad"}

In [13]:
def new_display_emotions(videoFaces, frame, bboxes):
    outputFrame = frame.copy()
    predictions = svm_model.predict(videoFaces)
    for i in range(len(bboxes)):
        emotion = y_translation[predictions[i]]
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(outputFrame, emotion, (bboxes[i][0] -1 ,bboxes[i][1] - 1), font, 1, (0,255,0), 2, cv2.LINE_AA)
        cv2.rectangle(outputFrame, (bboxes[i][0], bboxes[i][1]), (bboxes[i][2], bboxes[i][3]), (0, 255, 0), int(round(outputFrame.shape[0]/150)), 8)
    return outputFrame

In [14]:
def new_get_webcam_feed():
    cap = cv2.VideoCapture(0)
    hasFrame, frame = cap.read()
    #TODO : Handle cases for multiple people
    frame_count = 0
    tt_opencvDnn = 0
    while(1):
        try:
            hasFrame, frame = cap.read()
            if not hasFrame:
                break
            frame_count += 1
            outputFrame = frame
            #Gives a list of gray-scale images in webcam feed
            videoFaces, bboxes = face_reduction(frame)
            videoFaces = np.array(videoFaces)
            if videoFaces.shape[0] != 0:
#                 print(videoFaces.shape)
                videoFaces = videoFaces.reshape((videoFaces.shape[0],videoFaces.shape[1],videoFaces.shape[2],1))
                videoFaces = np.reshape(videoFaces, (videoFaces.shape[0], (videoFaces.shape[1]*videoFaces.shape[2]*videoFaces.shape[3])))
                outputFrame = new_display_emotions(videoFaces, frame, bboxes)
            cv2.imshow("frame", outputFrame)
            k = cv2.waitKey(10)
            if k == 27:
                break
        except Exception as e:
            print("Exception is ")
            print(e)
            break
    cv2.destroyAllWindows()

In [17]:
new_get_webcam_feed()

Exception is 
index 1 is out of bounds for axis 0 with size 1


In [17]:
params_grid = [{
    'kernel': ['rbf'],
    'gamma': [1e-3, 1e-4],
    'C': [1, 10, 100, 1000]
}, {
    'kernel': ['linear'],
    'C': [1, 10, 100, 1000]
}]


In [None]:
svm_model = GridSearchCV(SVC(), params_grid, cv=5)
svm_model.fit(X_train, y_train)

In [None]:
# View the accuracy score
print('Best score for training data:', svm_model.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model.best_estimator_.gamma,"\n")

In [None]:
final_model = svm_model.best_estimator_
Y_pred = final_model.predict(X_test)

In [None]:
# Making the Confusion Matrix
#print(pd.crosstab(Y_test_label, Y_pred_label, rownames=['Actual Activity'], colnames=['Predicted Activity']))
print(confusion_matrix(Y_test,Y_pred))
print("\n")
print(classification_report(Y_test,Y_pred))

print("Training set score for SVM: %f" % final_model.score(X_train, y_train))
print("Testing  set score for SVM: %f" % final_model.score(X_test, y_test ))

svm_model.score