In [5]:
import os
import matplotlib
import numpy as np
np.random.seed(1234)
import matplotlib.pyplot as plt
import csv
import lasagne
import theano
import theano.tensor as T
from numpy import genfromtxt

conv = lasagne.layers.Conv2DLayer
pool = lasagne.layers.MaxPool2DLayer
NUM_EPOCHS = 500
BATCH_SIZE = 256
LEARNING_RATE = 0.001
DIM = 48
DATA_SIZE = 35887
NUM_CLASSES = 10
FILE_NAME = "fer2013/fer2013.csv"

from lasagne.layers import Conv2DLayer as ConvLayer
#from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
from lasagne.layers import ElemwiseSumLayer
from lasagne.layers import InputLayer
from lasagne.layers import DenseLayer
from lasagne.layers import GlobalPoolLayer
from lasagne.layers import PadLayer
from lasagne.layers import ExpressionLayer
from lasagne.layers import NonlinearityLayer
from lasagne.nonlinearities import softmax, rectify
from lasagne.layers import batch_norm

In [6]:
def build_cnn(input_var=None, n=5):
    
    # create a residual learning building block with two stacked 3x3 convlayers as in paper
    def residual_block(l, increase_dim=False, projection=False):
        input_num_filters = l.output_shape[1]
        if increase_dim:
            first_stride = (2,2)
            out_num_filters = input_num_filters*2
        else:
            first_stride = (1,1)
            out_num_filters = input_num_filters

        stack_1 = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
        stack_2 = batch_norm(ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
        
        # add shortcut connections  
        if increase_dim:
            if projection:
                # projection shortcut, as option B in paper
                projection = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None, flip_filters=False))
                block = NonlinearityLayer(ElemwiseSumLayer([stack_2, projection]),nonlinearity=rectify)
            else:
                # identity shortcut, as option A in paper
                identity = ExpressionLayer(l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2]//2, s[3]//2))
                padding = PadLayer(identity, [out_num_filters//4,0,0], batch_ndim=1)
                block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]),nonlinearity=rectify)
        else:
            block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]),nonlinearity=rectify)
        
        return block

    # Building the network
    l_in = InputLayer(shape=(None, 1, 48, 48), input_var=input_var)

    # first layer, output is 16 x 32 x 32
    l = batch_norm(ConvLayer(l_in, num_filters=16, filter_size=(3,3), stride=(1,1), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
    
    # first stack of residual blocks, output is 16 x 32 x 32
    for _ in range(n):
        l = residual_block(l)

    # second stack of residual blocks, output is 32 x 16 x 16
    l = residual_block(l, increase_dim=True)
    for _ in range(1,n):
        l = residual_block(l)


    # third stack of residual blocks, output is 64 x 8 x 8
    l = residual_block(l, increase_dim=True)
    for _ in range(1,n):
        l = residual_block(l)
    
    # average pooling
    l = GlobalPoolLayer(l) ##!!!!!
    #l= lasagne.layers.dropout_channels(l,p=0.5)

    #l = batch_norm(DenseLayer(l, num_units = 16, W=lasagne.init.HeNormal(gain = 'relu'), nonlinearity=rectify))
    #l= lasagne.layers.MaxPool2DLayer(l, 2,pad=(0, 0), ignore_border=True) 
    # fully connected layer
    network = DenseLayer(
            l, num_units= 7,
            W=lasagne.init.HeNormal(),
            nonlinearity=softmax)

    return network


In [12]:
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')

# Create neural network model
print("Building model and compiling functions...")
network = build_cnn(input_var, 5)
print("number of parameters in model: %d" % lasagne.layers.count_params(network, trainable=True))


# Create a loss expression for validation/testing
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                  dtype=theano.config.floatX)

#test_acc1 = lasagne.objectives.categorical_accuracy(test_prediction, target_var, top_k=1)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var],test_prediction)

Building model and compiling functions...
number of parameters in model: 463671


In [13]:
with np.load('66.65.npz') as f:
             param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(network, param_values)

In [15]:
import cv2
import dlib
import matplotlib.pyplot as plt
# (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
#fig = plt.figure()
#cv2.destroyAllWindows()
#cap = cv2.VideoCapture(0)
#while(True):
    # Capture frame-by-frame
   # ret, frame = cap.read()
    
#start here
predictor_path = 'shape_predictor_68_face_landmarks.dat'
image_path = 'testImg/test5.jpg'
#os.system('pwd')
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
# (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
frame = cv2.imread(image_path,cv2.IMREAD_UNCHANGED)
#happy = cv2.imread('emojis/happy.png', cv2.IMREAD_UNCHANGED)
#happy = cv2.cvtColor(happy, cv2.COLOR_BGRA2RGBA)
#cv2.imshow('hi',happy)
#cv2.waitKey(0)
#plt.imshow(happy)
#print len(frame)
img = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)

facelocs = detector(img, 3)
faces = np.ndarray(shape=(1, 48*48), dtype=np.uint8)
for box in facelocs:
    face = gray[box.top(): box.bottom(), box.left(): box.right()]
    face = cv2.resize(face, (48, 48), interpolation = cv2.INTER_CUBIC)
    face = np.array(face)
    face = np.reshape(face, 48*48)
    faces = np.vstack((faces, face))
faces = np.delete(faces, 0, 0)
if faces.shape[0]!=0: 
    modes = np.array(['emojis/happy.png','emojis/disgust.png','emojis/fear.png','emojis/happy.png',
                  'emojis/sad.png','emojis/surprised.png','emojis/neutral.png'])
    faces = faces.reshape((faces.shape[0], 1, DIM, DIM))
    #print faces.shape
    net_out = val_fn(faces)   
    preds = np.argmax(net_out, axis=-1)
i=0
for box in facelocs:
    shapes = predictor(img, box)

    emoji = cv2.resize(cv2.cvtColor(cv2.imread(modes[preds[i]], -1), cv2.COLOR_BGRA2RGBA), (box.right() - box.left(), box.bottom() - box.top()), interpolation = cv2.INTER_CUBIC)
    rows, cols, d = emoji.shape
    #plt.imshow(emoji)

    diag = np.sqrt((box.bottom() - box.top())**2 + (box.right() - box.left())**2)
    wexp = (diag - (box.right() - box.left()))/2
    hexp = (diag - (box.bottom() - box.top()))/2
    wexp = int(wexp)
    hexp = int(hexp)
    emoji = cv2.copyMakeBorder(emoji, hexp, hexp, wexp, wexp, cv2.BORDER_REPLICATE)
    #plt.imshow(emoji)

    src = np.array([(cols*(20.0/512.0) + wexp, rows*(200.0/512.0) + hexp), (cols*(256.0/512.0)+ wexp, rows*(495.0/512.0)+ hexp), (cols*(492.0/512.0)+ wexp, rows*(200.0/512.0)+ hexp)])
    #src = np.array([(20, 200), (256, 495), (492, 200)])
    src = np.uint8(src)
    src = np.float32(src)
    dest = np.array([(shapes.part(0).x - box.left()+ wexp, shapes.part(0).y - box.top()+ hexp),(shapes.part(8).x-box.left()+ wexp, shapes.part(8).y - box.top()+ hexp),(shapes.part(16).x - box.left()+ wexp, shapes.part(16).y - box.top()+ hexp)])
    dest = np.float32(dest)
    rows, cols, d = emoji.shape
    trans = cv2.getAffineTransform(src,dest)
    #plt.imshow(trans)
    #print trans
    emoji = cv2.warpAffine(emoji, trans, (cols, rows))
    i+=1


    #pint(emoji.shape)
    for c in range(0,3):
        img[box.top() - hexp: box.bottom() + hexp, box.left() - wexp: box.right() + wexp, c] = emoji[:,:,c] * (emoji[:,:,3]/255.0) + img[box.top() - hexp: box.bottom() + hexp, box.left() - wexp: box.right() + wexp, c] * (1.0 - emoji[:,:,3]/255.0)
    plt.imsave('result5.jpg',img)

        #cv2.imshow('frame',img)
        
    #end here
    
    # Our operations on the frame come here
    #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Display the resulting frame

    #cv2.imshow('frame',gray)
    #if cv2.waitKey(1) & 0xFF == ord('q'):
   #     break

#cap.release()
#cv2.destroyAllWindows()