In [None]:
import nolearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(context='poster', style='dark')

# file kfkd.py
import os

from pandas.io.parsers import read_csv
from sklearn.utils import shuffle

from lasagne import layers
from lasagne.updates import sgd, nesterov_momentum, rmsprop, adagrad
from nolearn.lasagne import NeuralNet

from __future__ import print_function
print('loaded successfully!')

In [None]:
FTRAIN = 'training.csv'
FTEST = 'test.csv'

In [None]:
def load(test=False, cols=None):
    """Loads data from FTEST if *test* is True, otherwise from FTRAIN.
    Pass a list of *cols* if you're only interested in a subset of the
    target columns.
    """
    fname = FTEST if test else FTRAIN
    df = pd.read_csv(os.path.expanduser(fname))  # load pandas dataframe

    # The Image column has pixel values separated by space; convert
    # the values to numpy arrays:
    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))

    if cols:  # get a subset of columns
        df = df[list(cols) + ['Image']]

    print(df.count())  # prints the number of values for each column
    df = df.dropna()  # drop all rows that have missing values in them

    X = np.vstack(df['Image'].values) / 255.  # scale pixel values to [0, 1]
    X = X.astype(np.float32)

    if not test:  # only FTRAIN has any target columns
        y = df[df.columns[:-1]].values
        y = (y - 48) / 48  # scale target coordinates to [-1, 1]
        X, y = shuffle(X, y, random_state=42)  # shuffle train data
        y = y.astype(np.float32)
    else:
        y = None

    return X, y
    
    
def plot_training_history(net0, yscale='log'):
    import matplotlib.pyplot as plt
    train_loss = np.array([i["train_loss"] for i in net0.train_history_])
    valid_loss = np.array([i["valid_loss"] for i in net0.train_history_])
    
    fig = plt.figure()
    
    fig = plt.yscale(yscale)
        
    fig = plt.plot(train_loss, '-r', label="train",)
    fig = plt.plot(valid_loss, label="valid")
    fig = plt.grid()
    fig =  plt.legend()
    fig = plt.xlabel("epoch")
    fig = plt.ylabel("loss")
    
    return fig


def plot_sample(x, y, axis, dot_color = 'magenta'):
    img = x.reshape(96, 96)
    axis.imshow(img, cmap='gray')
    axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='o', s=15, color=dot_color)
    
    
def plot_faces(X_test, y_pred, dot_color = 'magenta'):
    fig = plt.figure(figsize=(10, 10))
    fig.subplots_adjust(
        left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

    for i in range(16):
        ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
        plot_sample(X_test[i], y_pred[i], ax, dot_color = dot_color)

    plt.show()
    

# train dataset

def plot_train_comparison(net):
    y_pred_1 = net.predict(X)

    print('ground truth:')
    plot_faces(X, y, dot_color='blue')

    print('\nprediction:')
    plot_faces(X, y_pred_1)
    
    
def plot_test_pred(net):
    y_pred_test = net.predict(X_test)
    plot_faces(X_test, y_pred_test)

In [None]:
print('this will take a bit')
X, y = load()

In [None]:
np.shape(X)

In [None]:
np.shape(y)

In [None]:
# simple ANN with one layer

from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

net0 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer)
        ,('hidden', layers.DenseLayer)
        ,('output', layers.DenseLayer)
    ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden_num_units=100,  # number of units in hidden layer
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values

    # optimization method:
    update=sgd,
    update_learning_rate=0.01,

    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=50,  # we want to train this many epochs
    verbose=1,
)

net0.fit(X, y)
plot_training_history(net0)

In [None]:
# change update to nesterov_momentum
# it's named after a Russian guy; must be good

from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

net1 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer)
        ,('hidden', layers.DenseLayer)
        ,('output', layers.DenseLayer)
    ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden_num_units=100,  # number of units in hidden layer
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=0.01,
    update_momentum=0.9,

    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=50,  # we want to train this many epochs
    verbose=1,
)

net1.fit(X, y)
plot_training_history(net1)

In [None]:
plot_train_comparison(net0)

In [None]:
# test dataset

X_test, _ = load(test=True)
    
plot_test_pred(net0)

In [None]:
plot_test_pred(net1)

In [None]:
# add a layer
# a few more layers, and it can beat AlphaGo

from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

net3 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer)
        ,('hidden1', layers.DenseLayer)
        ,('hidden2', layers.DenseLayer)        
        ,('output', layers.DenseLayer)
    ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden1_num_units=100,  # number of units in hidden layer
    hidden2_num_units=100,  # number of units in hidden layer
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=0.01,
    update_momentum=0.9,

    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=50,  # we want to train this many epochs
    verbose=1,
)

net3.fit(X, y)

In [None]:
plot_test_pred(net3)

In [None]:
plot_training_history(net1)
plot_training_history(net3)

In [None]:
# add dropout layers

from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

net4 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer)
        , ('dropout_0', layers.DropoutLayer)
        ,('hidden1', layers.DenseLayer)
        , ('dropout_1', layers.DropoutLayer)
        ,('hidden2', layers.DenseLayer)   
        ,('output', layers.DenseLayer)
    ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden1_num_units=100,  # number of units in hidden layer
    hidden2_num_units=100,  # number of units in hidden layer
    dropout_0_p=0.5, # probability for dropout for dropout_0
    dropout_1_p=0.5,
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=0.01,
    update_momentum=0.9,

    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=50,  # we want to train this many epochs
    verbose=1,
)

net4.fit(X, y)

plot_training_history(net4)

In [None]:
# increase learning rate

from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

net5 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer)
        , ('dropout_0', layers.DropoutLayer)
        ,('hidden1', layers.DenseLayer)
        , ('dropout_1', layers.DropoutLayer)
        ,('hidden2', layers.DenseLayer)   
        ,('output', layers.DenseLayer)
    ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden1_num_units=100,  # number of units in hidden layer
    hidden2_num_units=100,  # number of units in hidden layer
    dropout_0_p=0.5,
    dropout_1_p=0.5,
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=0.05,
    update_momentum=0.9,

    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=50,  # we want to train this many epochs
    verbose=1,
)

net5.fit(X, y)

plot_training_history(net5)

In [None]:
# try rmsprop activation

import theano

from lasagne.nonlinearities import sigmoid, rectify, very_leaky_rectify
# scaled_tanh = ScaledTanH()

def float32(k):
    return np.cast['float32'](k)


class AdjustVariable(object):
    def __init__(self, name, start=0.03, stop=0.001):
        self.name = name
        self.start, self.stop = start, stop
        self.ls = None

    def __call__(self, nn, train_history):
        if self.ls is None:
            self.ls = np.linspace(self.start, self.stop, nn.max_epochs)

        epoch = train_history[-1]['epoch']
        new_value = float32(self.ls[epoch - 1])
        getattr(nn, self.name).set_value(new_value)

        

net6 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer)
        , ('dropout_0', layers.DropoutLayer)
        ,('hidden1', layers.DenseLayer)
        , ('dropout_1', layers.DropoutLayer)
        ,('hidden2', layers.DenseLayer)   
        ,('output', layers.DenseLayer)
    ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden1_num_units=100,  # number of units in hidden layer
    hidden1_nonlinearity=rectify, # can also try sigmoid
    hidden2_num_units=100,  # number of units in hidden layer
    hidden2_nonlinearity=rectify,
    dropout_0_p=0.50,
    dropout_1_p=0.50,
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values
    
    
    # optimization method:
    update=rmsprop,
#     update_learning_rate=0.005,
#     update_momentum=0.85,
    update_learning_rate=theano.shared(float32(0.05)),
#     update_momentum=theano.shared(float32(0.9)),

    
# adjust learning rate: uncomment if interested:    
#     on_epoch_finished=[
#         AdjustVariable('update_learning_rate', start=0.03, stop=0.0001),
#         AdjustVariable('update_momentum', start=0.9, stop=0.999),
#         ],
    
    
    
    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=50,  # we want to train this many epochs
    verbose=1,
)

net6.fit(X, y)

plot_training_history(net6)

In [None]:
# change dropout rates a bit

from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

net7 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer)
        , ('dropout_0', layers.DropoutLayer)
        ,('hidden1', layers.DenseLayer)
        , ('dropout_1', layers.DropoutLayer)
        ,('hidden2', layers.DenseLayer)   
        ,('output', layers.DenseLayer)
    ],
    # layer parameters:
    input_shape=(None, 9216),  # 96x96 input pixels per batch
    hidden1_num_units=100,  # number of units in hidden layer
    hidden2_num_units=100,  # number of units in hidden layer
    dropout_0_p=0.75,
    dropout_1_p=0.10,
    output_nonlinearity=None,  # output layer uses identity function
    output_num_units=30,  # 30 target values

    # optimization method:
    update_learning_rate=theano.shared(float32(0.05)),
    update_momentum=theano.shared(float32(0.9)),

    
# adjust learning rate:
    on_epoch_finished=[
        AdjustVariable('update_learning_rate', start=0.05, stop=0.0001),
        AdjustVariable('update_momentum', start=0.9, stop=0.999),
        ],
    

    regression=True,  # flag to indicate we're dealing with regression problem
    max_epochs=50,  # we want to train this many epochs
    verbose=1,
)

net7.fit(X, y)

plot_training_history(net7)

In [None]:
# try your own
# http://lasagne.readthedocs.org/en/latest/index.html