In [1]:
import os    
os.environ['THEANO_FLAGS'] = "device=gpu1"    

In [2]:
import theano
import theano.tensor as T
import lasagne
from lasagne.utils import floatX

import numpy as np
from sklearn.cross_validation import train_test_split
from utils import build_model, iterate_minibatches
from data_prep import b01c_to_bc01, data_aug

Using gpu device 1: GeForce GTX 690 (CNMeM is disabled, CuDNN 4007)


In [3]:
import time, sys
from visualize import plot_loss, plot_conv_weights

In [4]:
seed=0
num_classes=23
toy=1
caffe_ref_path = '../models/theano_caffe_ref/caffe_reference.pkl'
X_path='../datasets/msrcv2/Xaug_b01c.npy'
Y_path='../datasets/msrcv2/Y.npy'
MEAN_IMG_PATH='../models/theano_caffe_ref/ilsvrc_2012_mean.npy'
batchsize=96
num_epochs=1000
snapshot=200 # save model after 200 epochs
p=0.5 # drop out prob.
lambda2=0.0005/2 # l2-regularizer constant

if toy:
    num_epochs=30
    snapshot=10
    batchsize=10
    #p=0
    #lambda2=0

In [5]:
### LOADING DATA
X = np.load(X_path)
Y = np.load(Y_path)
MEAN_IMG = np.load(MEAN_IMG_PATH)

In [6]:
# Split train/val/test set
indicies = np.arange(len(Y))
Y_train_val, Y_test, idx_train_val, idx_test = train_test_split(Y, indicies, 
    random_state=seed, train_size=float(2)/3)

In [7]:
Y_train, Y_val, idx_train, idx_val = train_test_split(Y_train_val, idx_train_val, random_state=seed)

In [8]:
print "Train/val/test set size:",len(idx_train),len(idx_val),len(idx_test)

Train/val/test set size: 295 99 197


In [9]:
# TODO flips

idx_aug_train = data_aug(idx_train, mode='aug', isMat='idx')
Xaug_train = b01c_to_bc01(X[idx_aug_train])
Yaug_train = data_aug(Y_train, mode='aug', isMat='Y')

idx_aug_val = data_aug(idx_val, mode='aug', isMat='idx')
Xaug_val = b01c_to_bc01(X[idx_aug_val])
Yaug_val = data_aug(Y_val, mode='aug', isMat='Y')

In [10]:
print "Augmented train/val set size:",len(Xaug_train),len(Yaug_val)

Augmented train/val set size: 1475 495


In [11]:
if toy: # try to overfit a tiny subset of the data
    Xaug_train = Xaug_train[:batchsize*2]
    Yaug_train = Yaug_train[:batchsize*2]
    Xaug_val = Xaug_val[:batchsize]
    Yaug_val = Yaug_val[:batchsize]

In [12]:
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.imatrix('targets')

In [13]:
network = build_model(caffe_ref_path, num_classes, input_var, p)

In [14]:
# Create a loss expression for training
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.binary_crossentropy(prediction, target_var) 

weights = lasagne.layers.get_all_params(network, regularizable=True)
loss = loss.mean() + theano.shared(floatX(lambda2))*T.sum([T.sum(w ** 2) for w in weights])


In [15]:
lr = theano.shared(np.array(0.01, dtype=theano.config.floatX))
lr_decay = np.array(0.3, dtype=theano.config.floatX)

In [16]:
# Create update expressions for training
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=lr, momentum=0.9)
# ! TODO adjust for per-layer training

In [17]:
# Create a loss expression for validation/testing
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.binary_crossentropy(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()

In [18]:
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates,)

# Compile a second function computing the validation loss:
val_fn = theano.function([input_var, target_var], test_loss)

In [19]:
training_history={}
training_history['training_loss'] = []
training_history['validation_loss'] = []
training_history['learning_rate'] = []
training_history['epoch_time'] = []

In [20]:
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
print("\nEpoch\tTrain Loss\tValid Loss\tTime\tLearning rate")
sys.setrecursionlimit(10000)

try:
    for epoch in range(num_epochs):    
        # TODO - Early stopping http://deeplearning.net/tutorial/gettingstarted.html#early-stopping

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(Xaug_train, Yaug_train, batchsize, shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:       
        val_err = 0
        #val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(Xaug_val, Yaug_val, batchsize, shuffle=False):
            inputs, targets = batch
            err = val_fn(inputs, targets)
            val_err += err
            #val_acc += acc
            val_batches += 1

        # Record training history
        training_history['epoch_time'].append(time.time() - start_time)
        training_history['training_loss'].append(train_err / train_batches)
        training_history['validation_loss'].append(val_err / val_batches)
        training_history['learning_rate'].append(lr.get_value())

        # Then we print the results for this epoch:
        print("{}\t{:.6f}\t{:.6f}\t{:.3f}s\t{}".format(
                epoch + 1, 
                training_history['training_loss'][-1],
                training_history['validation_loss'][-1],
                training_history['epoch_time'][-1],
                training_history['learning_rate'][-1]
            ))
            #print("  validation accuracy:\t\t{:.2f} %".format(
             #   val_acc / val_batches * 100))    

        if (epoch+1)%10==0: # ! TODO Condition for learning rate decay
            lr.set_value(lr.get_value() * lr_decay) 
            
        if (epoch+1)%snapshot==0:
            time_stamp=time.strftime("%y%m%d%H%M%S", time.localtime())           
            snapshot_path_string = '../snapshot_models/'+str(num_classes)+'alex'+time_stamp+'_'+str(epoch+1)
            np.savez(snapshot_path_string+'.npz', lasagne.layers.get_all_param_values(network))
            
except KeyboardInterrupt:
    pass

# Save model after num_epochs or KeyboardInterrupt
if (epoch+1)%snapshot!=0: # to avoid duplicate save
    time_stamp=time.strftime("%y%m%d%H%M%S", time.localtime())
    snapshot_path_string = '../snapshot_models/'+str(num_classes)+'alex'+time_stamp+'_'+str(epoch+1)
    np.savez(snapshot_path_string+'.npz', lasagne.layers.get_all_param_values(network))

Starting training...

Epoch	Train Loss	Valid Loss	Time	Learning rate
1	1.654835	0.462565	0.537s	0.00999999977648
2	1.098222	0.322888	0.492s	0.00999999977648
3	0.993433	0.357139	0.480s	0.00999999977648
4	0.959984	0.366446	0.474s	0.00999999977648
5	0.895165	0.334674	0.496s	0.00999999977648
6	0.839535	0.330638	0.490s	0.00999999977648
7	0.794487	0.317508	0.506s	0.00999999977648
8	0.753340	0.295795	0.499s	0.00999999977648
9	0.727279	0.301817	0.490s	0.00999999977648
10	0.708752	0.313290	0.491s	0.00999999977648
11	0.693198	0.318950	0.487s	0.00300000002608
12	0.679026	0.326207	0.480s	0.00300000002608
13	0.674395	0.339983	0.493s	0.00300000002608
14	0.674608	0.340451	0.488s	0.00300000002608
15	0.660730	0.343588	0.473s	0.00300000002608
16	0.669289	0.337010	0.473s	0.00300000002608
17	0.663192	0.340105	0.483s	0.00300000002608
18	0.665150	0.345344	0.475s	0.00300000002608
19	0.658888	0.356052	0.474s	0.00300000002608
20	0.658248	0.367638	0.472s	0.00300000002608
21	0.657248	0.377309	0.476s	0.0009000000

In [21]:
# And load them again later on like this:
#with np.load('../snapshot_models/23alex16042023213910.npz') as f:
#    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
# lasagne.layers.set_all_param_values(network, param_values)

In [22]:
plot_loss(training_history, snapshot_path_string+'_loss.png')

In [23]:
plot_conv_weights(lasagne.layers.get_all_layers(network)[1], snapshot_path_string+'_conv1weights_')