In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

label_map = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

# Data can be found here: https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge
def get_image_data():
    X, Y = [], []
    
    first = True
    for line in open('fer2013.csv'):
        if first:
            first = False
        else:
            row = line.split(',')
            Y.append(int(row[0]))
            X.append([int(p) for p in row[1].split()])
            
    X, Y = np.array(X) / 255.0, np.array(Y)
    return X, Y

def error_rate(predictions, expected):
    return np.mean(predictions!=expected)

def one_hot(Y):
    N = len(Y)
    K = len(set(Y))
    
    index = np.zeros((N, K))
    for i in range(N):
        index [i, Y[i]] = 1
        
    return index

def init_weight_and_bias(M1, M2):
    W = np.random.randn(M1, M2) / np.sqrt(M1 + M2)
    b = np.zeros(M2)
    return W.astype(np.float32), b.astype(np.float32)

In [6]:
X, Y = get_image_data()


[[ 0.2745098   0.31372549  0.32156863 ...,  0.41568627  0.42745098
   0.32156863]
 [ 0.59215686  0.58823529  0.57647059 ...,  0.75686275  0.71764706
   0.72156863]
 [ 0.90588235  0.83137255  0.61176471 ...,  0.34509804  0.43137255
   0.59607843]
 ..., 
 [ 0.06666667  0.06666667  0.0627451  ...,  0.60392157  0.52156863
   0.44313725]
 [ 0.11764706  0.10980392  0.10980392 ...,  0.1372549   0.11764706
   0.10980392]
 [ 0.0745098   0.05098039  0.05490196 ...,  0.74117647  0.78039216
   0.78823529]]


In [18]:
class HiddenLayer(object):
    def __init__(self, M1, M2, an_id):
        self.id = an_id
        self.M1 = M1
        self.M2 = M2
        W, b = init_weight_and_bias(M1, M2)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))
        self.params = [self.W, self.b]
        
    def forward(self, X):
        return tf.nn.relu(tf.matmul(X, self.W) + self.b)
    
    
    
class ANN(object):
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes
        
    def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, reg=10e-3, epochs=400, batch_size=100, show_fig=False):
        K = len(set(Y))
        
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = one_hot(Y).astype(np.float32)
        
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        Y_valid_flat = np.argmax(Y_valid, axis=1)
        
        X, Y = X[:-1000], Y[:-1000]
        
        N, D = X.shape
        self.hidden_layers = []
        M1 = D
        count = 0
        
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
            
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))
        
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params
        
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)
        
        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(act, tfT)) + rcost
        
        prediction = self.predict(tfX)
        train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)
        
        
        n_batches = int(N / batch_size)
        costs = []
        init = tf.global_variables_initializer()
        
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    X_batch = X[j * batch_size:(j + 1) * batch_size]
                    Y_batch = Y[j * batch_size:(j + 1) * batch_size]
                    
                    session.run(train_op, feed_dict={tfX: X_batch, tfT: Y_batch})
                    if j % 20 == 0:
                        c = session.run(cost, feed_dict={tfX: X_valid, tfT: Y_valid})
                        costs.append(c) 
            
            
                        p = session.run(prediction, feed_dict={tfX: X_valid, tfT: Y_valid})
                        e = error_rate(Y_valid_flat, p)
                    
                        print("Epoch=%d, Batch=%d/%d, Cost=%.3f, ErrorRate=%.3f" % (i, j, n_batches, c, e))
                        
        if show_fig:
            plt.plot(costs)
            plt.show()
            
    def forward(self, X):
        Z = X
        for h in self.hidden_layers:
            Z = h.forward(Z)
        return tf.matmul(Z, self.W) + self.b
    
    def predict(self, X):
        act = self.forward(X)
        return tf.argmax(act, 1)
    
        
        
    
        
    

In [None]:
model = ANN([2000, 1000, 500])
model.fit(X, Y, show_fig=True)

Epoch=0, Batch=0/348, Cost=12.343, ErrorRate=0.885
Epoch=0, Batch=20/348, Cost=12.343, ErrorRate=0.885
Epoch=0, Batch=40/348, Cost=12.341, ErrorRate=0.882
Epoch=0, Batch=60/348, Cost=12.340, ErrorRate=0.877
Epoch=0, Batch=80/348, Cost=12.338, ErrorRate=0.872
Epoch=0, Batch=100/348, Cost=12.335, ErrorRate=0.872
Epoch=0, Batch=120/348, Cost=12.333, ErrorRate=0.863
Epoch=0, Batch=140/348, Cost=12.330, ErrorRate=0.859
Epoch=0, Batch=160/348, Cost=12.327, ErrorRate=0.854
Epoch=0, Batch=180/348, Cost=12.323, ErrorRate=0.857
Epoch=0, Batch=200/348, Cost=12.320, ErrorRate=0.856
Epoch=0, Batch=220/348, Cost=12.317, ErrorRate=0.845
Epoch=0, Batch=240/348, Cost=12.313, ErrorRate=0.831
Epoch=0, Batch=260/348, Cost=12.310, ErrorRate=0.830
Epoch=0, Batch=280/348, Cost=12.307, ErrorRate=0.818
Epoch=0, Batch=300/348, Cost=12.303, ErrorRate=0.805
Epoch=0, Batch=320/348, Cost=12.300, ErrorRate=0.793
Epoch=0, Batch=340/348, Cost=12.296, ErrorRate=0.790
Epoch=1, Batch=0/348, Cost=12.295, ErrorRate=0.784
E