In [1]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers import Dense, Activation, Dropout, Flatten

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

import numpy as np
import matplotlib.pyplot as plt

#------------------------------
#cpu - gpu configuration
config = tf.ConfigProto( device_count = {'GPU': 0 , 'CPU': 56} ) #max: 1 gpu, 56 cpu
sess = tf.Session(config=config) 
keras.backend.set_session(sess)

class emotionDetection:
    def __init__(self):
        self.num_classes = 7 #angry, disgust, fear, happy, sad, surprise, neutral
        
        #construct CNN structure
        self.model = Sequential()

        #1st convolution layer
        self.model.add(Conv2D(64, (5, 5), activation='relu', input_shape=(48,48,1)))
        self.model.add(MaxPooling2D(pool_size=(5,5), strides=(2, 2)))

        #2nd convolution layer
        self.model.add(Conv2D(64, (3, 3), activation='relu'))
        self.model.add(Conv2D(64, (3, 3), activation='relu'))
        self.model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2)))

        #3rd convolution layer
        self.model.add(Conv2D(128, (3, 3), activation='relu'))
        self.model.add(Conv2D(128, (3, 3), activation='relu'))
        self.model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2)))

        self.model.add(Flatten())

        #fully connected neural networks
        self.model.add(Dense(1024, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(1024, activation='relu'))
        self.model.add(Dropout(0.2))

        self.model.add(Dense(self.num_classes, activation='softmax'))
        self.model.compile(loss='categorical_crossentropy',
                      optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
        
    def analysis(self, emotions):
        objects = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
        y_pos = np.arange(len(objects))

        plt.bar(y_pos, emotions, align='center', alpha=0.5)
        plt.xticks(y_pos, objects)
        plt.ylabel('percentage')
        plt.title('emotion')

        plt.show()

    def load_weights(self, weights_dir):
        self.model.load_weights(weights_dir)

    def train(self, x, y, batch_size, epochs):
        gen = ImageDataGenerator()
        train_generator = gen.flow(x, y, batch_size=batch_size)
        self.model.fit_generator(train_generator, steps_per_epoch=batch_size, epochs=epochs)

    def predict(self, x):        
        return self.model.predict(x)
    
    def evaluate(self, x, y):
        score = self.model.evaluate(x, y)
        print('Test loss:', score[0])
        print('Test accuracy:', 100*score[1])
        
    def see_test_result(self, x):
        predictions = self.model.predict(x)

        index = 0
        for i in predictions:
            if index < 30 and index >= 20:
                #print(i) #predicted scores
                #print(y_test[index]) #actual scores

                testing_img = np.array(x_test[index], 'float32')
                testing_img = testing_img.reshape([48, 48]);

                plt.gray()
                plt.imshow(testing_img)
                plt.show()

                print(i)

                emotion_analysis(i)
                print("----------------------------------------------")
            index = index + 1
        
num_classes = 7 #angry, disgust, fear, happy, sad, surprise, neutral
batch_size = 256
epochs = 5

#read kaggle facial expression recognition challenge dataset (fer2013.csv)
#https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge

with open("data/fer2013/fer2013.csv") as f:
    content = f.readlines()

lines = np.array(content)

num_of_instances = lines.size
print("number of instances: ",num_of_instances)
print("instance length: ",len(lines[1].split(",")[1].split(" ")))

#------------------------------
#initialize trainset and test set
x_train, y_train, x_test, y_test = [], [], [], []

#------------------------------
#transfer train and test set data
for i in range(1,num_of_instances):
    try:
        emotion, img, usage = lines[i].split(",")

        val = img.split(" ")

        pixels = np.array(val, 'float32')

        emotion = keras.utils.to_categorical(emotion, num_classes)

        if 'Training' in usage:
            y_train.append(emotion)
            x_train.append(pixels)
        elif 'PublicTest' in usage:
            y_test.append(emotion)
            x_test.append(pixels)
    except:
        print("",end="")

#------------------------------
#data transformation for train and test sets
x_train = np.array(x_train, 'float32')
y_train = np.array(y_train, 'float32')
x_test = np.array(x_test, 'float32')
y_test = np.array(y_test, 'float32')

x_train /= 255 #normalize inputs between [0, 1]
x_test /= 255

x_train = x_train.reshape(x_train.shape[0], 48, 48, 1)
x_train = x_train.astype('float32')
x_test = x_test.reshape(x_test.shape[0], 48, 48, 1)
x_test = x_test.astype('float32')

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

img_path = "/home/roboi9/workspace/Keras/Emotion Detection/data/test/ammar.jpg"
weights_dir = 'data/facial_expression_model_weights.h5'
batch_size = 256
epochs = 5

emo = emotionDetection()
emo.train(x_train, y_train,batch_size, epochs)
# emo.load_weights(weights_dir)
# emo.evaluate(x_test, y_test)
# emo.see_test_result(x_test)

img = image.load_img(img_path, grayscale=True, target_size=(48, 48))
x = image.img_to_array(img)
x = np.expand_dims(x, axis = 0)
x /= 255

custom = emo.predict(x)
emo.analysis(custom[0])



Using TensorFlow backend.


number of instances:  35888
instance length:  2304
28709 train samples
3589 test samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




<Figure size 640x480 with 1 Axes>

In [2]:
from imutils.video import VideoStream
from imutils.video import FPS
import imutils
import time
import cv2
import os

args = dict()
args['detector'] = 'face_detection_model'
args['embedding_model'] = 'openface_nn4.small2.v1.t7'
args['confidence'] = 0.5
protoPath = os.path.sep.join([args["detector"], "deploy.prototxt"])
modelPath = os.path.sep.join([args["detector"],
                              "res10_300x300_ssd_iter_140000.caffemodel"])
detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
embedder = cv2.dnn.readNetFromTorch(args["embedding_model"])
vs = VideoStream(src=0).start()
time.sleep(2.0)
fps = FPS().start()

def setTextBg(text, font, font_scale, thickness):
    size = cv2.getTextSize(text, font, font_scale, thickness)
    text_width = size[0][0]
    text_height = size[0][1]
    
    return text_width,text_height

while True:
    # grab the frame from the threaded video stream
    frame = vs.read() 

    # resize the frame to have a width of 600 pixels (while
    # maintaining the aspect ratio), and then grab the image
    # dimensions
    frame = imutils.resize(frame, width=600)
    (h, w) = frame.shape[:2]

    # construct a blob from the image
    imageBlob = cv2.dnn.blobFromImage(
        cv2.resize(frame, (300, 300)), 1.0, (300, 300),
        (104.0, 177.0, 123.0), swapRB=False, crop=False)

    # apply OpenCV's deep learning-based face detector to localize
    # faces in the input image
    detector.setInput(imageBlob)
    detections = detector.forward()

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the prediction
        confidence = detections[0, 0, i, 2]

        # filter out weak detections
        if confidence > args["confidence"]:
            # compute the (x, y)-coordinates of the bounding box for
            # the face
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # extract the face ROI
            face = frame[startY:endY, startX:endX]
            (fH, fW) = face.shape[:2]

            # ensure the face width and height are sufficiently large
            if fW < 20 or fH < 20:
                continue
                
           
            # construct a blob for the face ROI, then pass the blob
            # through our face embedding model to obtain the 128-d
            # quantification of the face
#             faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255,
#                 (96, 96), (0, 0, 0), swapRB=True, crop=False)
#             embedder.setInput(faceBlob)
#             vec = embedder.forward()

            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            x = face
            x = cv2.resize(x, dsize=(48, 48), interpolation=cv2.INTER_CUBIC)
            x = np.expand_dims(x, axis = 0)
            x = np.expand_dims(x, axis = 4)
            custom = emo.predict(x)
            
#             text = "{}: {:.2f}%".format(name, proba * 100)
            if (startY - 10) > 10:
                y = startY - 10
                y1 = startY - 20
                y2 = startY - 30
                y3 = startY - 40
            else:
                y = endY + 10
                y1 = endY + 20
                y2 = endY + 30
                y3 = endY + 40
            
#             y = startY - 10 if startY - 10 > 10 else startY + 10
#             y1 = startY - 10 if startY - 10 > 10 else startY + 20
#             y2 = startY - 10 if startY - 10 > 10 else startY + 30
#             y3 = startY - 10 if startY - 10 > 10 else startY + 40
#             y4 = startY - 10 if startY - 10 > 10 else startY + 10
#             y5 = startY - 10 if startY - 10 > 10 else startY + 10
            
            cv2.rectangle(frame, (startX, startY), (endX, endY), (230, 0, 230), 2)
            #angry, disgust, fear, happy, sad, surprise, neutral
            width, height = setTextBg("Angry : {:.2f} Disgust : {:.2f}".format(custom[0][0],custom[0][1]), 
                                      cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
            cv2.rectangle(frame, (startX, y), (startX + width, y-height), (230, 0, 230), cv2.FILLED) #BASELINE
            width, height = setTextBg("Fear : {:.2f} Happy : {:.2f}".format(custom[0][2],custom[0][3]), 
                                      cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
            cv2.rectangle(frame, (startX, y1), (startX + width, y1-height), (230, 0, 230), cv2.FILLED) #BASELINE
            width, height = setTextBg("Sad : {:.2f} Surprise : {:.2f}".format(custom[0][4],custom[0][5]), 
                                      cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
            cv2.rectangle(frame, (startX, y2), (startX + width, y2-height), (230, 0, 230), cv2.FILLED) #BASELINE
            width, height = setTextBg("Neutral : {:.2f}".format(custom[0][6]), 
                                      cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
            cv2.rectangle(frame, (startX, y3), (startX + width, y3-height), (230, 0, 230), cv2.FILLED) #BASELINE
            
            cv2.putText(frame,
                        "Angry : {:.2f} Disgust : {:.2f}".format(custom[0][0],custom[0][1]),                        
                        (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)
            cv2.putText(frame,
                        "Fear : {:.2f} Happy : {:.2f}".format(custom[0][2],custom[0][3]),                        
                        (startX, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)
            cv2.putText(frame,
                        "Sad : {:.2f} Surprise : {:.2f}".format(custom[0][4],custom[0][5]),                        
                        (startX, y2), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)
            cv2.putText(frame,
                        "Neutral : {:.2f}".format(custom[0][6]),                        
                        (startX, y3), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)
    
    # update the FPS counter
    fps.update()

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

