# Convolutional Networks
This is the demo for convolution neural network which is used to classify CIFAR10 dataset here.

# Load libraries and data

In [6]:
# As usual, a bit of setup

import time
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from Model.convnet import *
from Dataset.data_utils import get_CIFAR10_data
from Solver.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

X_val:  (1000, 3, 32, 32)
X_train:  (49000, 3, 32, 32)
X_test:  (1000, 3, 32, 32)
y_val:  (1000,)
y_train:  (49000,)
y_test:  (1000,)


# Train the first model

In [19]:
# set up learning parameters
learning_rate = 10**(-5) # initial learning rate
weight_scale = 10**(-3) # for weight initialization

# construct learning model, here is full connected neural network
# Here the network structure is:
# {conv-spatialbatchnorm-relu-max pool}x2-{affine-batchnorm-relu-dropout}x1-affine-softmax
# num_filters parameter specifies the filter number in each convolution layer, '0' represents using max pool
# hidden_dim parameter specifies the dimension of each full connected hidden layer
model1 = ConvNet(input_dim=[3,32,32],num_filters=[32,0,64,0], filter_size=3,
                hidden_dim=[256],num_classes=10,filter_stride=1, pad=None, 
                pool_size=2, pool_stride=2, weight_scale=weight_scale, reg=0.01, 
                dtype=np.float32, seed=None, use_batchnorm=True, dropout=0.5)

# construct a solver for the model above
solver1 = Solver(model1, data,
                num_epochs=50, batch_size=200,
                update_rule='adam',
                optim_config={
                  'learning_rate': learning_rate,
                },
                lr_decay=0.95,
                verbose=True, print_every=20)

# start training
solver1.train()

(Iteration 1 / 12250) loss: 2.308168
(Epoch 0 / 50) train acc: 0.114000; val_acc: 0.118000
(Iteration 21 / 12250) loss: 2.268943
(Iteration 41 / 12250) loss: 2.249681
(Iteration 61 / 12250) loss: 2.237493
(Iteration 81 / 12250) loss: 2.215893
(Iteration 101 / 12250) loss: 2.208433
(Iteration 121 / 12250) loss: 2.180398
(Iteration 141 / 12250) loss: 2.155566
(Iteration 161 / 12250) loss: 2.143117
(Iteration 181 / 12250) loss: 2.120898
(Iteration 201 / 12250) loss: 2.118120
(Iteration 221 / 12250) loss: 2.102424
(Iteration 241 / 12250) loss: 2.054974
(Epoch 1 / 50) train acc: 0.492000; val_acc: 0.516000
(Iteration 261 / 12250) loss: 2.064706
(Iteration 281 / 12250) loss: 2.042702
(Iteration 301 / 12250) loss: 2.038244
(Iteration 321 / 12250) loss: 2.003797
(Iteration 341 / 12250) loss: 1.977176
(Iteration 361 / 12250) loss: 1.983368
(Iteration 381 / 12250) loss: 1.936832
(Iteration 401 / 12250) loss: 1.973487
(Iteration 421 / 12250) loss: 1.950850
(Iteration 441 / 12250) loss: 1.947986
(

# Test your model1

In [20]:
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']
y_test_pred = np.argmax(model1.loss(X_test), axis=1)
y_val_pred = np.argmax(model1.loss(X_val), axis=1)
print 'Model 1 Validation set accuracy: ', (y_val_pred == y_val).mean()
print 'Model 1 Test set accuracy: ', (y_test_pred == y_test).mean()

Model 1 Validation set accuracy:  0.753
Model 1 Test set accuracy:  0.742


# Train the second model

In [21]:
# set up learning parameters
learning_rate = 10**(-5) # initial learning rate
weight_scale = 10**(-3) # for weight initialization

# construct learning model, here is full connected neural network
# Here the network structure is:
# {conv-spatialbatchnorm-relu-max pool}x2-{affine-batchnorm-relu-dropout}x1-affine-softmax
# num_filters parameter specifies the filter number in each convolution layer, '0' represents using max pool
# hidden_dim parameter specifies the dimension of each full connected hidden layer
model2 = ConvNet(input_dim=[3,32,32],num_filters=[32,32,0,64,64,0], filter_size=3,
                hidden_dim=[256,256],num_classes=10,filter_stride=1, pad=None, 
                pool_size=2, pool_stride=2, weight_scale=weight_scale, reg=0.01, 
                dtype=np.float32, seed=None, use_batchnorm=True, dropout=0.5)

# construct a solver for the model above
solver2 = Solver(model2, data,
                num_epochs=50, batch_size=200,
                update_rule='adam',
                optim_config={
                  'learning_rate': learning_rate,
                },
                lr_decay=0.95,
                verbose=True, print_every=100)

# start training
solver2.train()

(Iteration 1 / 12250) loss: 2.308429
(Epoch 0 / 50) train acc: 0.121000; val_acc: 0.110000
(Iteration 101 / 12250) loss: 2.211185
(Iteration 201 / 12250) loss: 2.103051
(Epoch 1 / 50) train acc: 0.467000; val_acc: 0.513000
(Iteration 301 / 12250) loss: 2.001738
(Iteration 401 / 12250) loss: 1.886993
(Epoch 2 / 50) train acc: 0.577000; val_acc: 0.579000
(Iteration 501 / 12250) loss: 1.830736
(Iteration 601 / 12250) loss: 1.793744
(Iteration 701 / 12250) loss: 1.714917
(Epoch 3 / 50) train acc: 0.646000; val_acc: 0.617000
(Iteration 801 / 12250) loss: 1.578736
(Iteration 901 / 12250) loss: 1.548976
(Epoch 4 / 50) train acc: 0.666000; val_acc: 0.653000
(Iteration 1001 / 12250) loss: 1.500658
(Iteration 1101 / 12250) loss: 1.480080
(Iteration 1201 / 12250) loss: 1.403755
(Epoch 5 / 50) train acc: 0.705000; val_acc: 0.675000
(Iteration 1301 / 12250) loss: 1.388039
(Iteration 1401 / 12250) loss: 1.335991
(Epoch 6 / 50) train acc: 0.729000; val_acc: 0.692000
(Iteration 1501 / 12250) loss: 1.3

# Test your Model2

In [22]:
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']
y_test_pred = np.argmax(model2.loss(X_test), axis=1)
y_val_pred = np.argmax(model2.loss(X_val), axis=1)
print 'Model 2 Validation set accuracy: ', (y_val_pred == y_val).mean()
print 'Model 2 Test set accuracy: ', (y_test_pred == y_test).mean()

Model 2 Validation set accuracy:  0.771
Model 2 Test set accuracy:  0.753


# Ensemble the two models

In [23]:
avgscores=(model1.loss(data['X_val'])+model2.loss(data['X_val']))/2

y_test_pred = np.argmax(avgscores, axis=1)
print 'Ensemble Model Validation set accuracy: ', (y_test_pred == data['y_val']).mean()

avgscores=(model1.loss(X_test)+model2.loss(X_test))/2

y_test_pred = np.argmax(avgscores, axis=1)
print 'Ensemble Model Test set accuracy: ', (y_test_pred == y_test).mean()

Ensemble Model Validation set accuracy:  0.776
Ensemble Model Test set accuracy:  0.78
