In [1]:
# Imports
import numpy as np
import tensorflow as tf
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import batch_normalization
from tflearn.layers.merge_ops import merge
from tflearn.optimizers import Momentum
from tflearn.layers.estimator import regression
from tflearn import DNN
import time

curses is not supported on this machine (please install/reinstall curses for an optimal experience)


In [2]:
# Constants
TRAINING_DATA_PATH = 'dataset/Training/'
VALIDATION_DATA_PATH = 'dataset/PublicTest/'
IMAGE_INPUT_SIZE = 48 # correspondes with image size
CHECKPOINT_PATH = "checkpoints/chkpnt"
LOGS_PATH = "logs"
MODEL_PATH = 'model/model.bin'

In [3]:
# Global
dict_data = dict()
dict_validation = dict()

In [4]:
# Hyperparams
keep_prob = 0.956
learning_rate = 0.015
optimizer_param = 0.97
learning_rate_decay = 0.865
decay_step = 50

In [5]:
# load training image data and transponse matrix
dict_data['X'] = np.load(TRAINING_DATA_PATH + 'images.npy').reshape([-1, IMAGE_INPUT_SIZE, IMAGE_INPUT_SIZE, 1])
dict_validation['X'] = np.load(VALIDATION_DATA_PATH + 'images.npy').reshape([-1, IMAGE_INPUT_SIZE, IMAGE_INPUT_SIZE, 1])

In [6]:
print(dict_validation['X'])

[[[[254]
   [254]
   [254]
   ...
   [ 16]
   [  0]
   [161]]

  [[254]
   [254]
   [254]
   ...
   [ 17]
   [  0]
   [122]]

  [[254]
   [254]
   [254]
   ...
   [ 26]
   [  0]
   [114]]

  ...

  [[ 66]
   [ 86]
   [ 99]
   ...
   [255]
   [252]
   [254]]

  [[ 84]
   [ 92]
   [ 92]
   ...
   [236]
   [255]
   [255]]

  [[ 93]
   [ 90]
   [ 85]
   ...
   [ 42]
   [129]
   [180]]]


 [[[156]
   [184]
   [198]
   ...
   [157]
   [154]
   [150]]

  [[146]
   [182]
   [199]
   ...
   [161]
   [154]
   [150]]

  [[135]
   [176]
   [195]
   ...
   [165]
   [161]
   [155]]

  ...

  [[ 28]
   [ 25]
   [ 21]
   ...
   [179]
   [175]
   [173]]

  [[ 29]
   [ 18]
   [ 22]
   ...
   [177]
   [172]
   [169]]

  [[ 21]
   [ 14]
   [ 23]
   ...
   [172]
   [167]
   [161]]]


 [[[ 69]
   [118]
   [ 61]
   ...
   [118]
   [124]
   [144]]

  [[ 66]
   [115]
   [ 57]
   ...
   [129]
   [123]
   [131]]

  [[ 64]
   [116]
   [ 61]
   ...
   [136]
   [140]
   [136]]

  ...

  [[114]
   [ 87]
   [136]
   

In [7]:
# load trainig landmarks data
dict_data['X2'] = np.load(TRAINING_DATA_PATH + 'landmarks.npy')
dict_validation['X2'] = np.load(VALIDATION_DATA_PATH + 'landmarks.npy')

In [8]:
print(dict_validation['X2'])

[[[ 7 17]
  [ 7 21]
  [ 7 26]
  ...
  [24 40]
  [21 40]
  [19 40]]

 [[ 3 21]
  [ 5 26]
  [ 7 31]
  ...
  [27 38]
  [25 40]
  [23 40]]

 [[ 8 21]
  [ 8 26]
  [ 9 31]
  ...
  [26 39]
  [24 39]
  [22 39]]

 ...

 [[ 6 16]
  [ 6 21]
  [ 6 26]
  ...
  [25 37]
  [23 37]
  [22 37]]

 [[-1 28]
  [ 1 33]
  [ 3 38]
  ...
  [30 37]
  [28 38]
  [26 38]]

 [[ 5 19]
  [ 6 24]
  [ 7 29]
  ...
  [25 37]
  [24 37]
  [22 37]]]


In [9]:
# load labels
dict_data['Y'] = np.load(TRAINING_DATA_PATH + 'labels.npy')
dict_validation['Y'] = np.load(VALIDATION_DATA_PATH + 'labels.npy')

In [10]:
print(dict_validation['Y'])

[[1 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0 ... 1 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]]


In [11]:
# model is divided in 2 seperate networks, which at the end get merged together.
# 1. network: images 48x48
# 2. network: 68 landmarks with each x,y coordinates
def build_model():
    # images network
    network_images = input_data(shape=[None, IMAGE_INPUT_SIZE, IMAGE_INPUT_SIZE, 1], name='input_image')
    # Convolutional Layer with filter size of 3 and 64 features
    network_images = conv_2d(network_images, 64, 3, activation='relu')
    # Normalization (remove negativ values)
    network_images = batch_normalization(network_images)
    # Pooling of size 3 and stride 2 => 48/2 = 24 => 24x24x64 (64 from conv layer) layer size
    network_images = max_pool_2d(network_images, 3, strides=2)
    # Convolutional Layer with filter size of 3 and 128 features
    network_images = conv_2d(network_images, 128, 3, activation='relu')
    # Normalization
    network_images = batch_normalization(network_images)
    # Pooling of size 3 and stride 2 => 24/2 = 24 => 12x12x128 (128 from conv layer) layer size
    network_images = max_pool_2d(network_images, 3, strides=2)
    # Convolutional Layer with filter size of 3 and 256 features
    network_images = conv_2d(network_images, 256, 3, activation='relu')
    # Normalization
    network_images = batch_normalization(network_images)
    # Pooling of size 3 and stride 2 => 12/2 = 6 => 6x6x256 (256 from conv layer) layer size
    network_images = max_pool_2d(network_images, 3, strides=2)
    # ignore some nodes for randomness
    network_images = dropout(network_images, keep_prob=keep_prob)
    # connect all nodes (6x6x256=9216 minus dropped) to 4096 output nodes
    network_images = fully_connected(network_images, 4096, activation='relu')
    # ignore some nodes for randomness
    network_images = dropout(network_images, keep_prob=keep_prob)
    # connect all nodes (4096 minus dropped) to 4096 output nodes
    network_images = fully_connected(network_images, 1024, activation='relu')
    # connect all nodes to 128 nodes to fit to landmarks for merge
    network_images = fully_connected(network_images, 128, activation='relu')
    
    # landmarks network
    network_landmarks = input_data(shape=[None, 68, 2], name='input_landmarks')
    # connect all 68 landmarks to new 1024 output nodes
    network_landmarks = fully_connected(network_landmarks, 1024, activation='relu')
    # connect all 1024 landmarks to new 128 output nodes
    network_landmarks = fully_connected(network_landmarks, 128, activation='relu')
    
    # merge the two networks together
    network = merge([network_images, network_landmarks], 'concat', axis=1)
    # finally connect them to the 7 emotions and use softmax to get values between 0..1 (for percentage)
    network = fully_connected(network, 7, activation='softmax')
    
    # optimizer (for weight and bias)
    optimizer = Momentum(learning_rate=learning_rate, momentum=optimizer_param, 
                    lr_decay=learning_rate_decay, decay_step=decay_step)
    # regression function to get results
    network = regression(network, optimizer=optimizer, loss='categorical_crossentropy', learning_rate=learning_rate, name='output')
    
    return network

In [12]:
# Start training graph
with tf.Graph().as_default():
    network = build_model()
    model = DNN(network, tensorboard_dir=LOGS_PATH, 
                        tensorboard_verbose=0, checkpoint_path=CHECKPOINT_PATH,
                        max_checkpoints=1)
    
    # start the training
    model.fit([dict_data['X'], dict_data['X2']], dict_data['Y'],
                                        validation_set=([dict_validation['X'], dict_validation['X2']], dict_validation['Y']),
                                        snapshot_step=500,
                                        show_metric=True,
                                        batch_size=128,
                                        n_epoch=13)
    
    # save the model
    model.save(MODEL_PATH)

Training Step: 2924  | total loss: [1m[32m0.34200[0m[0m | time: 8.096s
| Momentum | epoch: 013 | loss: 0.34200 - acc: 0.9099 -- iter: 28672/28709
Training Step: 2925  | total loss: [1m[32m0.34067[0m[0m | time: 9.136s
| Momentum | epoch: 013 | loss: 0.34067 - acc: 0.9056 | val_loss: 1.32749 - val_acc: 0.6063 -- iter: 28709/28709
--


In [14]:
# Test accuracy validation data
with tf.Graph().as_default():
    network = build_model()
    model = DNN(network, tensorboard_dir=LOGS_PATH, 
                        tensorboard_verbose=0, checkpoint_path=CHECKPOINT_PATH,
                        max_checkpoints=1)
    
    model.load(MODEL_PATH)
    accuracy = model.evaluate([dict_validation['X'], dict_validation['X2']], dict_validation['Y'])
    print(str(accuracy[0]*100)+"%")

INFO:tensorflow:Restoring parameters from C:\Users\Jay\Documents\test\model\model.bin
60.62970186723046%
