In [4]:
import os
import matplotlib
import numpy as np
np.random.seed(1234)
import matplotlib.pyplot as plt
import csv
import gzip
import lasagne
import theano
import theano.tensor as T
from numpy import genfromtxt
from lasagne.layers import batch_norm ,dropout,DenseLayer

conv = lasagne.layers.Conv2DLayer
pool = lasagne.layers.MaxPool2DLayer
NUM_EPOCHS = 500
BATCH_SIZE = 256
LEARNING_RATE = 0.001
DIM = 48
DATA_SIZE = 35887
NUM_CLASSES = 10
FILE_NAME = "fer2013/fer2013.csv"

In [5]:
def load_data():
    X_data = np.zeros((DATA_SIZE,2304))
    Y_data = np.zeros((DATA_SIZE,1))
    with open(FILE_NAME, 'rt') as csvfile:
        reader = csv.DictReader(csvfile)
        i=0
        for row in reader:
            X_data[i, :] = np.fromstring(row['pixels'], dtype=int, sep=' ')
            Y_data[i] = row['emotion']
            i = i + 1
    
    

    num_all = X_data.shape[0]
    mask = np.random.choice(num_all, int(num_all))
    
    
    X_train, X_valid, X_test = np.split(X_data[mask,:], [int(.8*len(X_data)), int(.9*len(X_data))])
    y_train, y_valid, y_test = np.split(Y_data[mask,0], [int(.8*len(Y_data)), int(.9*len(Y_data))])
    y_train = y_train.astype('int32')
    y_valid = y_valid.astype('int32')
    y_test = y_test.astype('int32')
    
    # reshape for convolutions
    X_train = X_train.reshape((X_train.shape[0], 1, DIM, DIM))
    X_valid = X_valid.reshape((X_valid.shape[0], 1, DIM, DIM))
    X_test = X_test.reshape((X_test.shape[0], 1, DIM, DIM))
    
    print ("Train samples:", X_train.shape)
    print ("Validation samples:", X_valid.shape)
    print ("Test samples:", X_test.shape)

    return dict(
        X_train=lasagne.utils.floatX(X_train),
        y_train=y_train.astype('int32'),
        X_valid=lasagne.utils.floatX(X_valid),
        y_valid=y_valid.astype('int32'),
        X_test=lasagne.utils.floatX(X_test),
        y_test=y_test.astype('int32'),
        num_examples_train=X_train.shape[0],
        num_examples_valid=X_valid.shape[0],
        num_examples_test=X_test.shape[0],
        input_height=X_train.shape[2],
        input_width=X_train.shape[3],
        output_dim=7,)
data = load_data()

Train samples: (28709, 1, 48, 48)
Validation samples: (3589, 1, 48, 48)
Test samples: (3589, 1, 48, 48)


In [6]:
def build_model(input_width, input_height, output_dim):
    ini = lasagne.init.HeUniform(gain='relu')
    l_in = lasagne.layers.InputLayer(shape=(None, 1, input_width, input_height),)
    
    
    #class_l5 = pool(class_l4, pool_size=(2, 2))
    #class_b1 = lasagne.layers.batch_norm(class_l1) #batchnorm
    #class_d1 = lasagne.layers.DropoutLayer(class_l2) #  dropout
    
    # Classi32fication network
    #stack_1 = batch_norm(ConvLayer(l, num_filters=out_num_filters,filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
    class_l1 = batch_norm(conv(
        l_in,
        num_filters=32,pad='same',
        filter_size=(3, 3),
        #filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=ini,
    ))
    class_d0 = dropout(class_l1,p=0.3)
    
    class_l2 = batch_norm(conv(
        class_d0,
        num_filters=32,pad='same',
        filter_size=(3, 3),
        #filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=ini,
    ))
    class_d1 = dropout(class_l2,p=0.3)
    
    
    class_l3 = batch_norm(conv(
        class_d1,
        num_filters=32,pad='same',
        filter_size=(3, 3),
        #filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=ini,
    ))
    class_d2 = dropout(class_l3,p=0.35)
    
    
    class_l4 = batch_norm(conv(
        class_d2,
        num_filters=32,pad='same',
        filter_size=(3, 3),
        #filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=ini,
    ))
    class_d3 = dropout(class_l4,p=0.2)   
    #class_l5 = pool(class_l4, pool_size=(2, 2))
    
    
    class_l1_dens = batch_norm(DenseLayer(
        class_d3,
        num_units=32,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=ini,
    )) 
    class_d1_dens = dropout(class_l1_dens,0.3)
    
    class_l2_dens = batch_norm(DenseLayer(
        class_d1_dens,
        num_units=32,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=ini,
    )) 
    class_d2_dens = dropout(class_l2_dens,p=0.2)
    
    l_out = DenseLayer(
        class_d2_dens,
        num_units=output_dim,
        nonlinearity=lasagne.nonlinearities.softmax,
        W=ini,
    )
    return l_out

model= build_model(DIM, DIM, NUM_CLASSES)

In [7]:
#Setting up the graph in theano
sym_x = T.tensor4('sym_x') # a symbolic variable, this is now a 4-D tensor.
sym_t = T.ivector('sym_t') # a symbolic variable taking on the value of the target batch.

# Retrieve network output
train_out = lasagne.layers.get_output(model, sym_x, deterministic=False)
eval_out = lasagne.layers.get_output(model, sym_x, deterministic=True)

# Retrieve list of all trainable parameters in the network.
all_params = lasagne.layers.get_all_params(model, trainable=True)

# add weight decay
all_layers = lasagne.layers.get_all_layers(model)
l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * 0.001


#reg2 = lasagne.regularization.l2(train_out)
#reg = lasagne.regularization.l1( train_out )
cost = lasagne.objectives.categorical_crossentropy(train_out+1e-8, sym_t).mean()

# Let Theano do its magic and get all the gradients we need for training
all_grads = T.grad(cost, all_params)
 
# Set the update function for parameters 
# you might wan't to experiment with more advanded update schemes like rmsprob, adadelta etc.
sh_lr = theano.shared(lasagne.utils.floatX(LEARNING_RATE))

Updates = lasagne.updates.adam(all_grads, all_params, learning_rate=sh_lr)

f_eval = theano.function([sym_x],eval_out, on_unused_input='warn')

f_train = theano.function([sym_x, sym_t],[cost],updates=Updates, on_unused_input='warn')

In [None]:
#Training Loop
NUM_CLASSES = 7
from confusionmatrix import ConfusionMatrix
batch_size = 100
num_epochs = 20
num_samples_train = data['X_train'].shape[0]                    #data['X_train'] data['X_valid'] data['X_test']
num_batches_train = num_samples_train // batch_size
num_samples_valid = data['X_valid'].shape[0]
num_batches_valid = num_samples_valid // batch_size
num_samples_test = data['X_test'].shape[0]
num_batches_test = num_samples_test // batch_size

train_acc, train_loss = [], []
valid_acc, valid_loss = [], []
test_acc, test_loss = [], []
cur_loss = 0
loss = []
#with np.load('para_d2.npz') as f:
#     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
#lasagne.layers.set_all_param_values( model, param_values)

try:
    for epoch in range(num_epochs):
        #Forward->Backprob->Update params
        confusion_valid = ConfusionMatrix(NUM_CLASSES)
        confusion_train = ConfusionMatrix(NUM_CLASSES)
        confusion_test = ConfusionMatrix(NUM_CLASSES)
        
        
        cur_loss = 0
        for i in range(num_batches_train):
            idx = range(i*batch_size, (i+1)*batch_size)
            x_batch = data['X_train'][idx]
            target_batch = data['y_train'][idx]    
            batch_loss = f_train(x_batch,target_batch) #this will do the complete backprob pass
            cur_loss += batch_loss[0]
        loss += [cur_loss/batch_size]
        
        for i in range(num_batches_train):
            idx = range(i*batch_size, (i+1)*batch_size)
            x_batch = data['X_train'][idx]
            targets_batch = data['y_train'][idx]    
            net_out = f_eval(x_batch)   
            preds = np.argmax(net_out, axis=-1) 
            confusion_train.batch_add(targets_batch, preds)

        for i in range(num_batches_valid):
            idx = range(i*batch_size, (i+1)*batch_size)
            x_batch = data['X_valid'][idx]
            targets_batch = data['y_valid'][idx]
            net_out = f_eval(x_batch)   
            preds = np.argmax(net_out, axis=-1) 
            confusion_valid.batch_add(targets_batch, preds)
            
        for i in range(num_batches_test):
            idx = range(i*batch_size, (i+1)*batch_size)
            x_batch = data['X_test'][idx]
            targets_batch = data['y_test'][idx]
            net_out = f_eval(x_batch)   
            preds = np.argmax(net_out, axis=-1)
            confusion_test.batch_add(targets_batch, preds)
            
                
        
        train_acc_cur = confusion_train.accuracy()
        valid_acc_cur = confusion_valid.accuracy()
        test_acc_cur = confusion_test.accuracy()
        
        train_acc += [train_acc_cur]
        valid_acc += [valid_acc_cur]
        test_acc += [test_acc_cur]
        np.savez('para.npz', *lasagne.layers.get_all_param_values(model))
        if (i+1) % 10 == 0:
            new_lr = sh_lr.get_value() * 0.7
            sh_lr.set_value(lasagne.utils.floatX(new_lr))
        print("Epoch %i : Train Loss %e , Train acc %f,  Valid acc %f ,Test acc %f , acc : %f"\
        % (epoch+1, loss[-1], train_acc_cur*100, valid_acc_cur*100,test_acc_cur*100,
           max(train_acc_cur-valid_acc_cur,train_acc_cur-test_acc_cur)*100))
except KeyboardInterrupt:
    pass
    

#get test set score
confusion_test = ConfusionMatrix(NUM_CLASSES)
net_out = f_eval(x_test)    
preds = np.argmax(net_out, axis=-1)
for i in range(3):
    plt.subplot(321+i*2)
confusion_test.batch_add(targets_test, preds)
print("\nTest set Acc:  %f" %(confusion_test.accuracy()))


epoch = np.arange(len(train_acc))
plt.figure()
plt.plot(epoch,train_acc,'r',epoch,valid_acc,'b')
plt.legend(['Train Acc','Val Acc'])
plt.xlabel('Epochs'), plt.ylabel('Acc'), plt.ylim([0.75,1.03])