In [1]:
%matplotlib inline

In [2]:
from keras.preprocessing import image
import sys, os
import pandas as pd
import numpy as np
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization,AveragePooling2D
from keras.regularizers import l2
from keras.utils import np_utils
import cv2
from matplotlib import pyplot as plt

from keras.models import model_from_json

### Data Collection

The Face data we used for training is taken from kaggle in Representation Learning: Facial Expression Recognition Challenge from 2013. the data consists of 48x48 pixel grayscale images of faces arranged in a row and seven categories/labels (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral) of emotions. 


In [12]:
#functions
#compie and fit the CNN model
def comFit(model,trainX, trainY, testX=None, testY=None):
    #Compliling the model
    model.compile(loss=categorical_crossentropy,optimizer=Adam(),metrics=['accuracy'])

    #Training the model
    model.fit(trainX, trainY,batch_size=64,epochs=32,verbose=1,shuffle=True, validation_data=(testX, testY),)
    
    #model = model_from_json(open("fer.json", "r").read())
    #model.load_weights('fer.h5')
    
    return model

#real time emotion prediction from specified source
def realPred(source,model,emotions):
    liveFrame = cv2.VideoCapture(source)
    faceDetectioin = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    while True:

        val, img = liveFrame.read()

        if not val:
            continue;

        testImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        detectedFaces = faceDetectioin.detectMultiScale(testImg, 1.3, 5)

        for (x,y,w,h) in detectedFaces:
            cv2.rectangle(img,(x,y),(x+w,y+h),(0,128,0),thickness=4)

            predImg=testImg[y:y+w,x:x+h]
            predImg=cv2.resize(predImg,(48,48))
            predPixels = image.img_to_array(predImg)
            predPixels = np.expand_dims(predPixels, axis = 0)
            predPixels /= 255

            pridectedList = model.predict(predPixels)

            maxVal = np.argmax(pridectedList[0])
            predEmotion = emotions[maxVal]
            
            path = os.path.join("emot",predEmotion+".png")
          
            
            emoji = cv2.imread(path,-1)
            emoji = cv2.resize(emoji,(50,50))
            
            y1, y2 = int(y), int(y) + emoji.shape[0]
            x1, x2 = int(x), int(x) + emoji.shape[1]
            
            ealpha = emoji[:, :, 3] / 255.0
            ialpha = 1.0 - ealpha

            for c in range(0, 3):
                img[y1:y2, x1:x2, c] = (ealpha * emoji[:, :, c] + ialpha * img[y1:y2, x1:x2, c])
            
            cv2.putText(img, predEmotion, (int(x), int(y)), cv2.FONT_HERSHEY_DUPLEX, 1, (120,0,255), 2)

        
        cv2.imshow('Analayzing Facial Emotion',img)

        if cv2.waitKey(10) & 0xFF == ord('e'):
            break

    liveFrame.release()
    cv2.destroyAllWindows
    #exit()
#CNN Model from training set
def craeateModel(trainX):
    
    model = Sequential()

    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(trainX.shape[1:])))
    model.add(Conv2D(64,kernel_size= (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2, 2)))
    model.add(Dropout(0.5))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2, 2)))
    model.add(Dropout(0.5))

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

    model.add(Flatten())

    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Dense(7, activation='softmax'))
    
    return model


In [4]:
#possible target emotions
emotions = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
#load the face data using panda from csv and extract triaining data from it.
faceData = pd.read_csv('fer2013.csv').iterrows()

trainingSize = 5000
testingSize =  2000

trainY = []
trainX = []

textY = []
testX = []

for i,j in faceData:
    
    if(trainingSize == 0):
        break;
    else:
        trainX.append(np.array(j['pixels'].split(" "),'float32'))
        trainY.append(j['emotion'])
    
    trainingSize-=1
    
for i,j in faceData:
      
    if testingSize == 0:
        break
    
    if 'PublicTest' in j['Usage']:
        testX.append(np.array(j['pixels'].split(" "),'float32'))
        textY.append(j['emotion'])
        
        testingSize -=1
        
    else:
        continue

#flatten the data array and normalize between 1 and 0 using mean and standard deviation for x
trainX = np.array(trainX)
trainY = np.array(trainY)
trainY = np_utils.to_categorical(trainY, num_classes=7)

trainX -= np.mean(trainX, axis=0)
trainX /= np.std(trainX, axis=0)

testX = np.array(testX)
textY = np.array(textY)
textY = np_utils.to_categorical(textY, num_classes=7)

testX -= np.mean(testX, axis=0)
testX /= np.std(testX, axis=0)
#reshape the matrices to represent each face data
trainX = trainX.reshape(trainX.shape[0], 48, 48, 1)
testX = testX.reshape(testX.shape[0], 48, 48, 1)

### Feature filtering
We used a three layer Convolutional neural network for extracting features and we aplied maxpooling to reduce the dimensions of the data. Finally we flatten the image matrix and pass it through a fully connected layer to classify the images. 

since we only have one input(image pixels) and one output(the pridicted emotion, 0 to 6 ),We used sequential class from keras for creating the CNN model. We add each layer instance with conv2d method, which  creates convolution kernel(4,4) that is convolved with the layer input to produce a tensor of outputs.The activatioin function used is relu, which activiate each neuron(not all at once) to produce output for the next layer. The drop out rate used is 0.2.

MaxPolling2D is used with window size and stride of 3 in each dimension. 

In [5]:
model = craeateModel(trainX)

###Compile and train the model

in the model configuration, with use adam optimizer and 'accuracy' as a metric to evaluate the model during the training and testing. And a loss/objective function used is crossentropy.

in the triaining of the model, number of samples per gradient update is set to be 64, max iteration is set to be 32, verbosity mode used is progress bar during training and shuffle is on , to shuffle the training data before each epoch. 



In [9]:
model =  comFit(model,trainX, trainY,testX,textY)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


###Facial Emotion Prediction

capturing live frames:
we used pc camera as source of video and we used openCV library.we used VideoCapture class with '0' as an argument for capturing videos from the default source of video(camera). We created an infinite loop to capture frames  from the vieo iteratively using the read method of VideoCapture class and convert the frame into grayscale.

detecting facial region:
We again make use of openCV's face detection api CascadeClassifier, to detect facial arias of the captured frame. The training data set we use for CascadeClassifier is haaarcascade_frontalface_default.xml. detectMultiScale returns the detected faces as list of rectangles and their locations.

prediction:
We loop through the detected face recangles and draw the rectangles on the screen. we select area of interst from the image based on the returned rectangle and the location of the image. convert the selected area into array of metrices containing pixles after resizing the image based on our training data. reduce the rgb representation by dividing it by 255 and give the image to our model for prediction.


In [4]:
#saving our model for later prediction use
modelJson = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(modelJson)
model.save_weights("model.h5")

NameError: name 'model' is not defined

In [13]:
#load our model for prediction
model = model_from_json(open("model.json", "r").read())
model.load_weights('model.h5')

realPred("Basic Emotions Test.mp4",model,emotions)