# High-level Theano + Lasagne Example

In [1]:
%%writefile ~/.theanorc
[global]
device = cuda0
force_device= True
floatX = float32
warn_float64 = warn

Overwriting /home/iliauk/.theanorc


In [2]:
import numpy as np
import os
import sys
import theano.tensor as T
import theano
import lasagne
import lasagne.layers as L
import lasagne.nonlinearities as nl
import lasagne.objectives as obj
import lasagne.updates as upd
from common.params import *
from common.utils import *

Using cuDNN version 6021 on context None
Mapped name None to device cuda0: Tesla P100-PCIE-16GB (BC4B:00:00.0)


In [3]:
# Performance Improvement
# 1. CuDNN auto-tune
theano.config.dnn.conv.algo_fwd = "time_once"
theano.config.dnn.conv.algo_bwd_filter = "time_once"
theano.config.dnn.conv.algo_bwd_data = "time_once"

In [4]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("Theano: ", theano.__version__)
print("Lasagne: ", lasagne.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.14.1
Theano:  1.0.1
Lasagne:  0.2.dev1
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [5]:
def create_symbol(n_classes=N_CLASSES):
    conv1 = L.Conv2DLayer(X, num_filters=50, filter_size=(3, 3), pad='same')
    conv2 = L.Conv2DLayer(conv1, num_filters=50, filter_size=(3, 3), pad='same')
    pool1 = L.MaxPool2DLayer(conv2, pool_size=(2, 2), stride=(2, 2))
    drop1 = L.DropoutLayer(pool1, 0.25)
    
    conv3 = L.Conv2DLayer(drop1, num_filters=100, filter_size=(3, 3), pad='same')
    conv4 = L.Conv2DLayer(conv3, num_filters=100, filter_size=(3, 3), pad='same')
    pool2 = L.MaxPool2DLayer(conv4, pool_size=(2, 2), stride=(2, 2))
    drop2 = L.DropoutLayer(pool2, 0.25)
    
    flatten = L.FlattenLayer(drop2)
    fc1 = L.DenseLayer(flatten, 512)
    drop4 = L.DropoutLayer(fc1, 0.5)
    pred = L.DenseLayer(drop4, n_classes, name="output", nonlinearity=nl.softmax)
    
    return pred

In [6]:
def init_model(net, y, lr=LR, momentum=MOMENTUM):
    pred = L.get_output(net)
    params = L.get_all_params(net)
    xentropy = obj.categorical_crossentropy(pred, y)
    loss = T.mean(xentropy)
    # The tensorflow LR, MOMENTUM are slightly different
    updates = upd.momentum(loss, params, lr, momentum)
    return pred, loss, updates

In [7]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 653 ms, sys: 595 ms, total: 1.25 s
Wall time: 1.26 s


In [8]:
%%time
# Place-holders
X = L.InputLayer(shape=(None, 3, 32, 32))
y = T.ivector("y")
# Initialise model
net = create_symbol()

CPU times: user 50.6 ms, sys: 3.33 ms, total: 54 ms
Wall time: 53.2 ms


In [9]:
%%time
pred, loss, updates = init_model(net, y)
# Accuracy for logging
accuracy = obj.categorical_accuracy(pred, y)
accuracy = T.mean(T.cast(accuracy, theano.config.floatX))

CPU times: user 259 ms, sys: 41.5 ms, total: 300 ms
Wall time: 766 ms


In [10]:
%%time
# Compile functions
train_func = theano.function([X.input_var, y], [loss, accuracy], updates=updates)
pred = L.get_output(net, deterministic=True)
pred_func = theano.function([X.input_var], T.argmax(pred, axis=1))

CPU times: user 2.93 s, sys: 424 ms, total: 3.36 s
Wall time: 13.5 s


In [11]:
%%time
# Main training loop: 1m5s
for j in range(EPOCHS):
    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        loss, acc_train = train_func(data, label)
    # Log
    print(j, "Train accuracy:", acc_train)

0 Train accuracy: 0.375
1 Train accuracy: 0.640625
2 Train accuracy: 0.578125
3 Train accuracy: 0.6875
4 Train accuracy: 0.6875
5 Train accuracy: 0.78125
6 Train accuracy: 0.703125
7 Train accuracy: 0.734375
8 Train accuracy: 0.671875
9 Train accuracy: 0.765625
CPU times: user 40 s, sys: 24.3 s, total: 1min 4s
Wall time: 1min 5s


In [12]:
%%time
# Main evaluation loop: 319s
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = y_test[:n_samples]
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    output = pred_func(data)
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = output
    c += 1

CPU times: user 200 ms, sys: 120 ms, total: 319 ms
Wall time: 319 ms


In [13]:
print("Accuracy: ", 1.*sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.7606169871794872
