In [1]:
%%writefile dl_utlils.py
import theano
import theano.tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from collections import OrderedDict
import numpy as np

rng = np.random.RandomState(1234)

def relu(x):
    return T.maximum(0, x)


def sigmoid(x):
    return T.nnet.sigmoid(x)


def tanh(x):
    return T.tanh(x)


class Metric(object):

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def negative_log_likelihood(self):
        self.prob_of_y_given_x = T.nnet.softmax(self.x)
        return -T.mean(T.log(self.prob_of_y_given_x)[T.arange(self.y.shape[0]), self.y])

    def cross_entropy(self):
        self.prob_of_y_given_x = T.nnet.softmax(self.x)
        return T.mean(T.nnet.categorical_crossentropy(self.prob_of_y_given_x, self.y))

    def mean_squared_error(self):
        return T.mean((self.x - self.y) ** 2)

    def errors(self):
        if self.y.ndim != self.y_pred.ndim:
            raise TypeError('y should have the same shape as self.y_pred',
                            ('y', self.y.type, 'y_pred', self.y_pred.type))

        if self.y.dtype.startswith('int'):
            self.prob_of_y_given_x = T.nnet.softmax(self.x)
            self.y_pred = T.argmax(self.prob_of_y_given_x, axis=1)
            return T.mean(T.neq(self.y_pred, self.y))
        else:
            return NotImplementedError()

    def accuracy(self):
        if self.y.dtype.startswith('int'):
            self.prob_of_y_given_x = T.nnet.softmax(self.x)
            self.y_pred = T.argmax(self.prob_of_y_given_x, axis=1)
            return T.mean(T.eq(self.y_pred, self.y))
        else:
            return NotImplementedError()


def shared_data(x, y):
    shared_x = theano.shared(
        np.asarray(x, dtype=theano.config.floatX), borrow=True)
    if y is None:
        return shared_x

    shared_y = theano.shared(
        np.asarray(y, dtype=theano.config.floatX), borrow=True)

    return shared_x, T.cast(shared_y, 'int32')


def build_shared_zeros(shape, name):
    """ Builds a theano shared variable filled with a zeros numpy array """
    return theano.shared(
        value=np.zeros(shape, dtype=theano.config.floatX),
        name=name,
        borrow=True
    )


def dropout(x, train, p=0.5, rng = np.random.RandomState(1234)):
    masked_x = None
    if p > 0.0 and p < 1.0:
        seed = rng.randint(2 ** 30)
        srng = T.shared_randomstreams.RandomStreams(seed)
        mask = srng.binomial(
            n=1,
            p=1.0 - p,
            size=x.shape,
            dtype=theano.config.floatX
        )
        masked_x = x * mask
    else:
        masked_x = x
    return T.switch(T.neq(train, 0), masked_x, x * (1.0 - p))


class Optimizer(object):

    def __init__(self, params=None):
        if params is None:
            return NotImplementedError()
        self.params = params

    def updates(self, loss=None):
        if loss is None:
            return NotImplementedError()

        self.updates = OrderedDict()
        self.gparams = [T.grad(loss, param) for param in self.params]


def build_shared_zeros(shape, name):
    """ Builds a theano shared variable filled with a zeros numpy array """
    return theano.shared(
        value=np.zeros(shape, dtype=theano.config.floatX),
        name=name,
        borrow=True
    )


class RMSprop(Optimizer):

    def __init__(self, learning_rate=0.001, alpha=0.99, eps=1e-8, params=None):
        super(RMSprop, self).__init__(params=params)

        self.learning_rate = learning_rate
        self.alpha = alpha
        self.eps = eps

        self.mss = [
            build_shared_zeros(t.shape.eval(), 'ms') for t in self.params]

    def updates(self, loss=None):
        super(RMSprop, self).updates(loss=loss)

        for ms, param, gparam in zip(self.mss, self.params, self.gparams):
            _ms = ms * self.alpha
            _ms += (1 - self.alpha) * gparam * gparam
            self.updates[ms] = _ms
            self.updates[param] = param - self.learning_rate * \
                gparam / T.sqrt(_ms + self.eps)

        return self.updates

class AdaDelta(Optimizer):

    def __init__(self, rho=0.95, eps=1e-6, params=None):
        super(AdaDelta, self).__init__(params=params)

        self.rho = rho
        self.eps = eps
        self.accugrads = [
            build_shared_zeros(t.shape.eval(), 'accugrad') for t in self.params]
        self.accudeltas = [
            build_shared_zeros(t.shape.eval(), 'accudelta') for t in self.params]

    def updates(self, loss=None):
        super(AdaDelta, self).updates(loss=loss)

        for accugrad, accudelta, param, gparam\
                in zip(self.accugrads, self.accudeltas, self.params, self.gparams):
            agrad = self.rho * accugrad + (1 - self.rho) * gparam * gparam
            dx = - T.sqrt((accudelta + self.eps) / (agrad + self.eps)) * gparam
            self.updates[accudelta] = (
                self.rho * accudelta + (1 - self.rho) * dx * dx)
            self.updates[param] = param + dx
            self.updates[accugrad] = agrad

        return self.updates

class MomentumSGD(Optimizer):

    def __init__(self, learning_rate=0.01, momentum=0.9, params=None):
        super(MomentumSGD, self).__init__(params=params)
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.vs = [build_shared_zeros(t.shape.eval(), 'v')
                   for t in self.params]

    def updates(self, loss=None):
        super(MomentumSGD, self).updates(loss=loss)

        for v, param, gparam in zip(self.vs, self.params, self.gparams):
            _v = v * self.momentum
            _v = _v - self.learning_rate * gparam
            self.updates[param] = param + _v
            self.updates[v] = _v

        return self.updates    

class Adam(Optimizer):

    def __init__(self, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8, gamma=1 - 1e-8, params=None):
        super(Adam, self).__init__(params=params)

        self.alpha = alpha
        self.b1 = beta1
        self.b2 = beta2
        self.gamma = gamma
        self.t = theano.shared(np.float32(1))
        self.eps = eps

        self.ms = [build_shared_zeros(t.shape.eval(), 'm')
                   for t in self.params]
        self.vs = [build_shared_zeros(t.shape.eval(), 'v')
                   for t in self.params]

    def updates(self, loss=None):
        super(Adam, self).updates(loss=loss)
        self.b1_t = self.b1 * self.gamma ** (self.t - 1)

        for m, v, param, gparam \
                in zip(self.ms, self.vs, self.params, self.gparams):
            _m = self.b1_t * m + (1 - self.b1_t) * gparam
            _v = self.b2 * v + (1 - self.b2) * gparam ** 2

            m_hat = _m / (1 - self.b1 ** self.t)
            v_hat = _v / (1 - self.b2 ** self.t)

            self.updates[param] = param - self.alpha * \
                m_hat / (T.sqrt(v_hat) + self.eps)
            self.updates[m] = _m
            self.updates[v] = _v
        self.updates[self.t] = self.t + 1.0

        return self.updates

# Multi Layer Perceptron

class Layer:
    # Constructor
    def __init__(self, in_dim, out_dim):
        rng = np.random.RandomState(1234)
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.W = theano.shared(rng.uniform(low=-0.08, high=0.08,
                                           size=(in_dim, out_dim)
                                           ).astype('float32'), name='W')
        self.b = theano.shared(np.zeros(out_dim).astype('float32'), name='b')
        self.params = [self.W, self.b]
        

    # Forward Propagation
    def f_prop(self, x):
        self.z = T.dot(x, self.W) + self.b
        return self.z

class Activation:
    # Constructor
    def __init__(self, function):
        self.function = function
        self.params = []

    # Forward Propagation
    def f_prop(self, x):
        self.z = self.function(x)
        return self.z
    
class BatchNorm:
    # Constructor
    def __init__(self, shape, epsilon=np.float32(1e-5)):
        self.shape = shape
        self.epsilon = epsilon

        self.gamma = theano.shared(np.ones(self.shape, dtype="float32"),
                                   name="gamma")
        self.beta = theano.shared(np.zeros(self.shape, dtype="float32"),
                                  name="beta")
        self.params = [self.gamma, self.beta]

    # Forward Propagation
    def f_prop(self, x):
        if x.ndim == 2:
            mean = T.mean(x, axis=0, keepdims=True)
            std = T.sqrt(T.var(x, axis=0, keepdims=True) + self.epsilon)
        elif x.ndim == 4:
            mean = T.mean(x, axis=(0, 2, 3), keepdims=True)
            std = T.sqrt(T.var(x, axis=(0, 2, 3), keepdims=True) +
                         self.epsilon)

        normalized_x = (x - mean) / std
        self.z = self.gamma * normalized_x + self.beta
        return self.z



Overwriting dl_utlils.py


In [2]:
from collections import OrderedDict
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from theano.tensor.nnet import conv2d
from theano.tensor.signal import pool
from theano.tensor.shared_randomstreams import RandomStreams

import numpy as np
import theano
import theano.tensor as T

rng = np.random.RandomState(1234)

Using cuDNN version 5110 on context None
Mapped name None to device cuda: GeForce GTX 775M (0000:01:00.0)


In [20]:
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
                           mnist.target.astype('int32'),
                           random_state=42)

mnist_X = mnist_X / 255.0

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=42)

In [26]:
train_y = np.eye(10)[train_y].astype('int32')
train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y, test_size=0.2, random_state=42)

In [31]:
import theano
import theano.tensor as T
from dl_utlils import *
activation = relu
mlp_layers =  [784, 500, 500, 500, 10]
layers = []
for i_layer in range(len(mlp_layers)-2):
    layers.append(Layer(mlp_layers[i_layer], mlp_layers[i_layer+1]))
    BatchNorm(mlp_layers[i_layer+1],mlp_layers[i_layer+1])
    layers.append(Activation(relu))
    
layers.append(Layer(mlp_layers[-2], mlp_layers[-1]))
# layers.append(Activation(T.nnet.softmax))
    
x = T.fmatrix('x')
t = T.fmatrix('t')

params = []
for i, layer in enumerate(layers):
    params += layer.params
    if i == 0:
        layer_out = layer.f_prop(x)
    else:
        layer_out = layer.f_prop(layer_out)

y = layers[-1].z
# cost = T.mean(T.nnet.categorical_crossentropy(y, t))
cost = T.mean((y - t) ** 2)
optimizer = Adam(params=params)
updates = optimizer.updates(cost)

train = theano.function(inputs=[x, t], outputs=cost, updates=updates,
                        allow_input_downcast=True, name='train')
valid = theano.function(inputs=[x, t], outputs=[cost, T.argmax(y, axis=1)],
                        allow_input_downcast=True, name='valid')
test = theano.function(inputs=[x], outputs=T.argmax(y, axis=1), name='test')

batch_size = 100
n_batches = train_X.shape[0]//batch_size
n_epochs = 50
for epoch in range(n_epochs):
    #train_X, train_y = shuffle(train_X, train_y)
    for i in range(n_batches):
        start = i*batch_size
        end = start + batch_size
        train(train_X[start:end], train_y[start:end])
    valid_cost, pred_y = valid(valid_X, valid_y)
    print('EPOCH:: %i, Validation cost: %.3f, Validation F1: %.3f' %
          (epoch + 1, valid_cost,
           f1_score(np.argmax(valid_y, axis=1).astype('int32'),
                    pred_y, average='macro')))



EPOCH:: 1, Validation cost: 0.008, Validation F1: 0.966
EPOCH:: 2, Validation cost: 0.006, Validation F1: 0.975
EPOCH:: 3, Validation cost: 0.005, Validation F1: 0.976
EPOCH:: 4, Validation cost: 0.004, Validation F1: 0.977
EPOCH:: 5, Validation cost: 0.004, Validation F1: 0.974
EPOCH:: 6, Validation cost: 0.004, Validation F1: 0.975
EPOCH:: 7, Validation cost: 0.004, Validation F1: 0.977
EPOCH:: 8, Validation cost: 0.005, Validation F1: 0.970
EPOCH:: 9, Validation cost: 0.003, Validation F1: 0.978
EPOCH:: 10, Validation cost: 0.004, Validation F1: 0.978
EPOCH:: 11, Validation cost: 0.004, Validation F1: 0.977
EPOCH:: 12, Validation cost: 0.004, Validation F1: 0.976
EPOCH:: 13, Validation cost: 0.004, Validation F1: 0.977
EPOCH:: 14, Validation cost: 0.004, Validation F1: 0.977
EPOCH:: 15, Validation cost: 0.003, Validation F1: 0.980
EPOCH:: 16, Validation cost: 0.003, Validation F1: 0.978
EPOCH:: 17, Validation cost: 0.004, Validation F1: 0.976
EPOCH:: 18, Validation cost: 0.004, Vali

In [34]:
import os 
import pickle
base_folder = os.path.join("..","data")
filename = "Train_N225_forex.pickle"
path = os.path.join(base_folder, filename)
with open(path, mode='rb') as f:
    dataset = pickle.load(f)
train, test = dataset

In [36]:
train_X, train_y = train
valid_X, valid_y = test


In [48]:
train_X.shape
train_y = train_y.reshape(len(train_y),1)
valid_y = valid_y.reshape(len(valid_y),1)


In [55]:
from sklearn.metrics import r2_score
activation = relu
mlp_layers =  [11, 20, 20, 20, 1]
layers = []
for i_layer in range(len(mlp_layers)-2):
    layers.append(Layer(mlp_layers[i_layer], mlp_layers[i_layer+1]))
    BatchNorm(mlp_layers[i_layer+1],mlp_layers[i_layer+1])
    layers.append(Activation(relu))
    
layers.append(Layer(mlp_layers[-2], mlp_layers[-1]))
# layers.append(Activation(T.nnet.softmax))
    
x = T.fmatrix('x')
t = T.fmatrix('t')

params = []
for i, layer in enumerate(layers):
    params += layer.params
    if i == 0:
        layer_out = layer.f_prop(x)
    else:
        layer_out = layer.f_prop(layer_out)

y = layers[-1].z
# cost = T.mean(T.nnet.categorical_crossentropy(y, t))
cost = T.mean((y - t) ** 2)
optimizer = Adam(params=params)
updates = optimizer.updates(cost)

train = theano.function(inputs=[x, t], outputs=cost, updates=updates,
                        allow_input_downcast=True, name='train')
valid = theano.function(inputs=[x, t], outputs=[cost, y],
                        allow_input_downcast=True, name='valid')

batch_size = 100
n_batches = train_X.shape[0]//batch_size
n_epochs = 500
for epoch in range(n_epochs):
    #train_X, train_y = shuffle(train_X, train_y)
    for i in range(n_batches):
        start = i*batch_size
        end = start + batch_size
        train(train_X[start:end], train_y[start:end])
    valid_cost, pred_y = valid(valid_X, valid_y)
    print('EPOCH:: %i, Validation cost: %.3f, Validation R2: %.3f' %
          (epoch + 1, valid_cost,
           r2_score(valid_y.astype('float32'),
                    pred_y)))




EPOCH:: 1, Validation cost: 0.593, Validation R2: -55.769
EPOCH:: 2, Validation cost: 0.533, Validation R2: -50.020
EPOCH:: 3, Validation cost: 0.429, Validation R2: -40.079
EPOCH:: 4, Validation cost: 0.273, Validation R2: -25.128
EPOCH:: 5, Validation cost: 0.113, Validation R2: -9.847
EPOCH:: 6, Validation cost: 0.037, Validation R2: -2.522
EPOCH:: 7, Validation cost: 0.030, Validation R2: -1.884
EPOCH:: 8, Validation cost: 0.037, Validation R2: -2.526
EPOCH:: 9, Validation cost: 0.034, Validation R2: -2.248
EPOCH:: 10, Validation cost: 0.027, Validation R2: -1.596
EPOCH:: 11, Validation cost: 0.023, Validation R2: -1.169
EPOCH:: 12, Validation cost: 0.020, Validation R2: -0.897
EPOCH:: 13, Validation cost: 0.018, Validation R2: -0.699
EPOCH:: 14, Validation cost: 0.014, Validation R2: -0.295
EPOCH:: 15, Validation cost: 0.012, Validation R2: -0.125
EPOCH:: 16, Validation cost: 0.010, Validation R2: 0.031
EPOCH:: 17, Validation cost: 0.008, Validation R2: 0.195
EPOCH:: 18, Validatio

EPOCH:: 147, Validation cost: 0.004, Validation R2: 0.647
EPOCH:: 148, Validation cost: 0.001, Validation R2: 0.926
EPOCH:: 149, Validation cost: 0.004, Validation R2: 0.624
EPOCH:: 150, Validation cost: 0.001, Validation R2: 0.923
EPOCH:: 151, Validation cost: 0.004, Validation R2: 0.615
EPOCH:: 152, Validation cost: 0.001, Validation R2: 0.922
EPOCH:: 153, Validation cost: 0.004, Validation R2: 0.623
EPOCH:: 154, Validation cost: 0.001, Validation R2: 0.923
EPOCH:: 155, Validation cost: 0.004, Validation R2: 0.643
EPOCH:: 156, Validation cost: 0.001, Validation R2: 0.924
EPOCH:: 157, Validation cost: 0.003, Validation R2: 0.669
EPOCH:: 158, Validation cost: 0.001, Validation R2: 0.926
EPOCH:: 159, Validation cost: 0.003, Validation R2: 0.695
EPOCH:: 160, Validation cost: 0.001, Validation R2: 0.929
EPOCH:: 161, Validation cost: 0.003, Validation R2: 0.718
EPOCH:: 162, Validation cost: 0.001, Validation R2: 0.931
EPOCH:: 163, Validation cost: 0.003, Validation R2: 0.737
EPOCH:: 164, V

EPOCH:: 302, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 303, Validation cost: 0.003, Validation R2: 0.748
EPOCH:: 304, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 305, Validation cost: 0.003, Validation R2: 0.748
EPOCH:: 306, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 307, Validation cost: 0.003, Validation R2: 0.748
EPOCH:: 308, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 309, Validation cost: 0.003, Validation R2: 0.747
EPOCH:: 310, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 311, Validation cost: 0.003, Validation R2: 0.747
EPOCH:: 312, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 313, Validation cost: 0.003, Validation R2: 0.746
EPOCH:: 314, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 315, Validation cost: 0.003, Validation R2: 0.746
EPOCH:: 316, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 317, Validation cost: 0.003, Validation R2: 0.746
EPOCH:: 318, Validation cost: 0.001, Validation R2: 0.934
EPOCH:: 319, V

EPOCH:: 452, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 453, Validation cost: 0.002, Validation R2: 0.768
EPOCH:: 454, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 455, Validation cost: 0.002, Validation R2: 0.768
EPOCH:: 456, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 457, Validation cost: 0.002, Validation R2: 0.769
EPOCH:: 458, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 459, Validation cost: 0.002, Validation R2: 0.769
EPOCH:: 460, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 461, Validation cost: 0.002, Validation R2: 0.770
EPOCH:: 462, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 463, Validation cost: 0.002, Validation R2: 0.771
EPOCH:: 464, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 465, Validation cost: 0.002, Validation R2: 0.771
EPOCH:: 466, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 467, Validation cost: 0.002, Validation R2: 0.772
EPOCH:: 468, Validation cost: 0.001, Validation R2: 0.935
EPOCH:: 469, V