Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [1]:
import matplotlib.pyplot as plt
# Config the matplotlib backend as plotting inline in IPython
%matplotlib inline
from one_layer_covnet import OneLayerCovnet
from one_layer_fully_connected import OneLayerFullyConnected
from pooling import Pooling
from dropout import Dropout
from covnet import Covnet
from six.moves import cPickle as pickle

  from ._conv import register_converters as _register_converters


In [2]:
pickle_file = '../lesson_2/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [3]:
def accuracy(predictions, labels):
    """
    simple static method to compute accuracy, not specific to Covnets
    """
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
              / predictions.shape[0])

In [4]:
# first covnet layer
cvn_layer_1 = OneLayerCovnet(height=28,
                             width=28,
                             depth=1,
                             nb_labels=10,
                             patch_height=5,
                             patch_width=5,
                             conv_depth=16,
                             stride_height=2,
                             stride_width=2,
                             padding='SAME')
# second covnet layer
cvn_layer_2 = OneLayerCovnet(height=cvn_layer_1.output_height,
                             width=cvn_layer_1.output_width,
                             depth=cvn_layer_1.conv_depth,
                             nb_labels=10,
                             patch_height=5,
                             patch_width=5,
                             conv_depth=16,
                             stride_height=2,
                             stride_width=2,
                             padding='SAME')
# 1st fully connected
fcn_layer_1 = OneLayerFullyConnected(input_dim=cvn_layer_2.output_height * cvn_layer_2.output_width * cvn_layer_2.conv_depth,
                                     nb_hidden=64,
                                     nb_labels=10)
# 2nd fully connected
fcn_layer_2 = OneLayerFullyConnected(input_dim=fcn_layer_1.nb_hidden,
                                     nb_hidden=10, # = number of labels
                                     nb_labels=10)


In [None]:
one_layers = [cvn_layer_1, cvn_layer_2, fcn_layer_1, fcn_layer_2]
cvn = Covnet(batch_size=16,
             one_layers=one_layers)

Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [5]:
import numpy as np
def reformat(dataset, labels, height, width, depth, nb_labels):
  """
  reformating the dataset into a matrix where each
  row is a picture where the columns have been stacked
  """
  dataset = dataset.reshape((-1, height, width, depth)).astype(np.float32)
  labels = (np.arange(nb_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels, cvn_layer_1.height, cvn_layer_1.width, cvn_layer_1.depth, cvn_layer_1.nb_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels, cvn_layer_1.height, cvn_layer_1.width, cvn_layer_1.depth, cvn_layer_1.nb_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels, cvn_layer_1.height, cvn_layer_1.width, cvn_layer_1.depth, cvn_layer_1.nb_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [None]:
graph = cvn.populate_graph(valid_dataset, test_dataset)

In [None]:
cvn.run_session(train_dataset,
                train_labels,
                valid_labels,
                test_labels,
                num_steps=1001,
                num_summary_points=100)

---
Problem 0
---------

Add a dropout layer with a probability of being kept of 0.8 after the first two convolution layers.

---

In [6]:
dropout_1 = Dropout(height=cvn_layer_1.output_height,
                    width=cvn_layer_1.output_width,
                    depth=cvn_layer_1.conv_depth,
                    nb_labels=10,
                    keep_prob=0.8)
dropout_2 = Dropout(height=cvn_layer_2.output_height,
                    width=cvn_layer_2.output_width,
                    depth=cvn_layer_2.conv_depth,
                    nb_labels=10,
                    keep_prob=0.8)

To do: remove Dropout for validation and test!
To do: remove Dropout for validation and test!


In [None]:
one_layers_dropout = [cvn_layer_1, dropout_1, cvn_layer_2, dropout_2, fcn_layer_1, fcn_layer_2]
cvn_dropout = Covnet(batch_size=16,
                      one_layers=one_layers_dropout)

In [None]:
graph_dropout = cvn_dropout.populate_graph(valid_dataset, test_dataset)

In [None]:
cvn_dropout.run_session(train_dataset,
                        train_labels,
                        valid_labels,
                        test_labels,
                        num_steps=1001,
                        num_summary_points=100)

---
Problem 1
---------

The first convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.

---

In [None]:
# first covnet layer
cvn_layer_pool_1 = OneLayerCovnet(height=28,
                                  width=28,
                                  depth=1,
                                  nb_labels=10,
                                  patch_height=5,
                                  patch_width=5,
                                  conv_depth=16,
                                  stride_height=1,
                                  stride_width=1,
                                  padding='SAME')
# first pooling layer
pool_layer_1 = Pooling(height=cvn_layer_pool_1.output_height,
                       width=cvn_layer_pool_1.output_width,
                       depth=cvn_layer_pool_1.conv_depth,
                       nb_labels=10,
                       pool_height=2,
                       pool_width=2,
                       pool_depth=1,
                       stride_height=2,
                       stride_width=2,
                       stride_depth=1,
                       padding='SAME')
# second covnet layer
cvn_layer_pool_2 = OneLayerCovnet(height=pool_layer_1.output_height,
                                  width=pool_layer_1.output_width,
                                  depth=pool_layer_1.output_depth,
                                  nb_labels=10,
                                  patch_height=5,
                                  patch_width=5,
                                  conv_depth=16,
                                  stride_height=1,
                                  stride_width=1,
                                  padding='SAME')
# first pooling layer
pool_layer_2 = Pooling(height=cvn_layer_pool_2.output_height,
                       width=cvn_layer_pool_2.output_width,
                       depth=cvn_layer_pool_2.conv_depth,
                       nb_labels=10,
                       pool_height=2,
                       pool_width=2,
                       pool_depth=1,
                       stride_height=2,
                       stride_width=2,
                       stride_depth=1,
                       padding='SAME')
# 1st fully connected
fcn_layer_pool_1 = OneLayerFullyConnected(input_dim=pool_layer_2.output_height * pool_layer_2.output_width * pool_layer_2.output_depth,
                                          nb_hidden=64,
                                          nb_labels=10)
# 2nd fully connected
fcn_layer_pool_2 = OneLayerFullyConnected(input_dim=fcn_layer_pool_1.nb_hidden,
                                          nb_hidden=10, # = number of labels
                                          nb_labels=10)

one_layers_pool = [cvn_layer_pool_1, pool_layer_1, cvn_layer_pool_2,
                   pool_layer_2, fcn_layer_pool_1, fcn_layer_pool_2]
cvn_pool = Covnet(batch_size=16,
                  one_layers=one_layers_pool)

In [None]:
graph_pool = cvn_pool.populate_graph(valid_dataset, test_dataset)

In [None]:
cvn_pool.run_session(train_dataset,
                     train_labels,
                     valid_labels,
                     test_labels,
                     num_steps=1001,
                     num_summary_points=100)

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---