Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [17]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import cPickle as pickle
import numpy as np
import tensorflow as tf
import time
import math

Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [4]:
pickle_file = 'notMNIST.pickle'

IMAGE_PIXELS = 784

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
    
  train_dataset = train_dataset.reshape((-1, IMAGE_PIXELS)).astype(np.float32)
  valid_dataset = valid_dataset.reshape((-1, IMAGE_PIXELS)).astype(np.float32)
  test_dataset = test_dataset.reshape((-1, IMAGE_PIXELS)).astype(np.float32)

  print 'Training set', train_dataset.shape, train_labels.shape
  print 'Validation set', valid_dataset.shape, valid_labels.shape
  print 'Test set', test_dataset.shape, test_labels.shape

Training set (190000, 784) (190000,)
Validation set (10000, 784) (10000,)
Test set (18724, 784) (18724,)


Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [22]:
class BaseTensorFlow:
    def __init__(self, batch_size=16, starting_learning_rate=0.01,  learning_rate_decay=0.99, num_steps=1001):
        self.batch_size = batch_size
        self.image_size = 28
        self.NUM_CLASSES = 10
        self.num_channels = 1 
        self.starting_learning_rate = starting_learning_rate
        self.learning_rate_decay = learning_rate_decay
        self.train_dir = './output'
        self.num_steps = num_steps
        self.IMAGE_PIXELS = 784
        
    def model(self, images, image_size, num_channels, output_size, isEval=None):
        raise Exception('Error', 'Not implemented')
    
    def loadData(self):
        pickle_file = 'notMNIST.pickle'

        with open(pickle_file, 'rb') as f:
          save = pickle.load(f)
          self.train_dataset = save['train_dataset']
          self.train_labels = save['train_labels']
          self.valid_dataset = save['valid_dataset']
          self.valid_labels = save['valid_labels']
          self.test_dataset = save['test_dataset']
          self.test_labels = save['test_labels']
          del save  # hint to help gc free up memory
    
          self.train_dataset = self.train_dataset.reshape((-1, self.IMAGE_PIXELS)).astype(np.float32)
          self.valid_dataset = self.valid_dataset.reshape((-1, self.IMAGE_PIXELS)).astype(np.float32)
          self.test_dataset = self.test_dataset.reshape((-1, self.IMAGE_PIXELS)).astype(np.float32)

          print 'Training set', self.train_dataset.shape, self.train_labels.shape
          print 'Validation set', self.valid_dataset.shape, self.valid_labels.shape
          print 'Test set', self.test_dataset.shape, self.test_labels.shape

    def loss_function(self,logits, labels):        
        labels = tf.expand_dims(labels, 1)
        indices = tf.expand_dims(tf.range(0, self.batch_size), 1)
        concated = tf.concat(1, [indices, labels])
        onehot_labels = tf.sparse_to_dense(
              concated, tf.pack([self.batch_size, self.NUM_CLASSES]), 1.0, 0.0)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                          onehot_labels,
                                                          name='xentropy')
        loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
        return loss

    def training(self,loss, train_size):
        tf.scalar_summary(loss.op.name, loss)
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(
            self.starting_learning_rate,      
            global_step * self.batch_size,  
            train_size,          
            self.learning_rate_decay,                
            staircase=True)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return train_op

    def evaluation(self,logits, labels):
        correct = tf.nn.in_top_k(logits, labels, 1)
        return tf.reduce_sum(tf.cast(correct, tf.int32))

    def preapare_placeholder_inputs(self):
        self.images_placeholder = tf.placeholder(tf.float32, shape=(self.batch_size, self.image_size, self.image_size, self.num_channels))
        self.labels_placeholder = tf.placeholder(tf.int32, shape=(self.batch_size))
 
    def fill_feed_dict(self, dataset, labels, step):
        if labels.shape[0] - self.batch_size > 0:
            offset = (step * self.batch_size) % (labels.shape[0] - self.batch_size)
        else:
            offset = 0
        images_feed = dataset[offset:(offset + self.batch_size), :]
        images_feed = images_feed.reshape((self.batch_size, self.image_size, self.image_size, self.num_channels)).astype(np.float32)

        labels_feed = labels[offset:(offset + self.batch_size)]
        feed_dict = {
            self.images_placeholder: images_feed,
            self.labels_placeholder: labels_feed,
        }
        return feed_dict
        
    def do_eval(self,sess, eval_correct, dataset, labels):
        true_count = 0  
        steps_per_epoch = labels.shape[0] // self.batch_size
        num_examples = steps_per_epoch * self.batch_size
        for step in xrange(steps_per_epoch):
            feed_dict = self.fill_feed_dict(dataset, labels, step)
            true_count += sess.run(eval_correct, feed_dict=feed_dict)
        
        precision = 1.0*true_count / num_examples
        print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' %
            (num_examples, true_count, precision))
    
     
    def run_training(self,sess, eval_correct, train_op, loss):
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(self.train_dir, graph_def=sess.graph_def)
        saver = tf.train.Saver()
    
        feed_dict = self.fill_feed_dict(self.train_dataset, self.train_labels, 0)
    
        for step in xrange(self.num_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
            
            feed_dict = self.fill_feed_dict(self.train_dataset, self.train_labels, step+1)
           
            duration = time.time() - start_time
            if step % 50 == 0:
                print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
                summary_str = sess.run(summary_op, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                
            if (step + 1) % 1000 == 0 or (step + 1) == self.num_steps:
                saver.save(sess, self.train_dir, global_step=step)
                print('Training Data Eval:')
                self.do_eval(sess, eval_correct,
                    feed_dict[self.images_placeholder], feed_dict[self.labels_placeholder])
                print('Validation Data Eval:')
                self.do_eval(sess, eval_correct, self.valid_dataset, self.valid_labels)
           
    def process(self):
        with tf.Graph().as_default():
            self.preapare_placeholder_inputs()
        
            logits_train, regularizer = self.model(self.images_placeholder, self.NUM_CLASSES)
            loss = self.loss_function(logits_train, self.labels_placeholder)
            if regularizer:
                loss += 5e-4 * regularizer
                
            train_op = self.training(loss,self.train_dataset.shape[0])
        
            logits_eval = self.model(self.images_placeholder, self.NUM_CLASSES, isEval=True)
            eval_correct = self.evaluation(logits_eval, self.labels_placeholder)
    
            with tf.Session() as sess:
                init = tf.initialize_all_variables()
                sess.run(init)
            
                self.run_training(sess, eval_correct, train_op, loss)
                print('Test Data Eval:')
                self.do_eval(sess,
                    eval_correct,
                    self.test_dataset, self.test_labels)

In [23]:
class inference2(BaseTensorFlow):
    def __init__(self, batch_size=16, starting_learning_rate=0.01,  learning_rate_decay=0.99, num_steps=1001):
        BaseTensorFlow.__init__(self, batch_size, starting_learning_rate,  learning_rate_decay, num_steps)
        self.SEED = 66478
        self.patch_size = 5
        self.depth = 16
        self.num_hidden = 64
        
    def model(self, images, output_size, isEval=None):
        images_shape = images.get_shape().as_list()
        image_size = images_shape[1]
        num_channels = images_shape[3]
        
        regularizers =None
        with tf.variable_scope('layer1', reuse=isEval):
            weights = tf.get_variable("weights", [self.patch_size, self.patch_size, num_channels, self.depth],
                initializer=tf.random_normal_initializer(0.0, 0.1, seed= self.SEED))    
            biases = tf.get_variable("biases", [self.depth], initializer=tf.constant_initializer(0.0))

            conv = tf.nn.conv2d(images, weights, [1, 1, 1, 1], padding='SAME')
            relu = tf.nn.relu(tf.nn.bias_add(conv, biases))
            pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
        
        with tf.variable_scope('layer2', reuse=isEval):
            weights = tf.get_variable("weights", [self.patch_size, self.patch_size, self.depth, self.depth],
                initializer=tf.random_normal_initializer(0.0, 0.1, seed= self.SEED))    
            biases = tf.get_variable("biases", [self.depth], initializer=tf.constant_initializer(1.0))

            conv = tf.nn.conv2d(pool, weights, [1, 1, 1, 1], padding='SAME')
            relu = tf.nn.relu(tf.nn.bias_add(conv , biases))
            pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')    
                  
        with tf.variable_scope('layer3', reuse=isEval):
            weights = tf.get_variable("weights", [image_size / 4 * image_size / 4 * self.depth, self.num_hidden],
                initializer=tf.random_normal_initializer(0.0, 0.1, seed= self.SEED))
            biases = tf.get_variable("biases", [self.num_hidden], initializer=tf.constant_initializer(1.0))

            shape = pool.get_shape().as_list()
            reshape = tf.reshape(pool, [shape[0], shape[1] * shape[2] * shape[3]])
            relu = tf.nn.relu(tf.matmul(reshape, weights) + biases)
            if not isEval:
                relu = tf.nn.dropout(relu, 0.5, seed=self.SEED)
            regularizers = tf.nn.l2_loss(weights) 
            
        with tf.variable_scope('layer4', reuse=isEval):
            weights = tf.get_variable("weights", [self.num_hidden, output_size],
                initializer=tf.random_normal_initializer(0.0, 0.1, seed= self.SEED))
            biases = tf.get_variable("biases", [output_size], initializer=tf.constant_initializer(1.0))
            
            logits = tf.matmul(relu, weights) + biases
            regularizers += tf.nn.l2_loss(weights) 

        if isEval:  
            return logits
        else:
            return (logits, regularizers)
            

In [None]:
pylab.gray();

pylab.subplot(1, 2, 1); pylab.axis('off'); pylab.imshow(np.hstack(list(train_dataset[0:4]) + [blank]))
pylab.subplot(1, 2, 2); pylab.axis('off'); pylab.imshow(np.hstack( [blank] + list(train_dataset[1:4]) + [blank]))
#pylab.subplot(1, 4, 3); pylab.axis('off'); pylab.imshow(np.hstack(train_dataset[10:15]))
#pylab.subplot(1, 4, 4); pylab.axis('off'); pylab.imshow(np.hstack(train_dataset[15:20]))
pylab.show()

blank = np.random.rand(28,28)

In [24]:
#if __name__ == '__main__':
model1 = inference2(batch_size=16,
                    starting_learning_rate=0.005,  
                    learning_rate_decay=0.99, 
                    num_steps=10001)
model1.loadData()
model1.process()

Step 0: loss = 3.91 (0.084 sec)
Step 50: loss = 0.72 (0.009 sec)
Training Data Eval:
  Num examples: 16  Num correct: 8  Precision @ 1: 0.5000
Validation Data Eval:
  Num examples: 10000  Num correct: 6533  Precision @ 1: 0.6533
Step 100: loss = 1.63 (0.011 sec)
Step 150: loss = 0.83 (0.009 sec)
Training Data Eval:
  Num examples: 16  Num correct: 12  Precision @ 1: 0.7500
Validation Data Eval:
  Num examples: 10000  Num correct: 7702  Precision @ 1: 0.7702
Step 200: loss = 0.50 (0.016 sec)
Step 250: loss = 0.56 (0.008 sec)
Training Data Eval:
  Num examples: 16  Num correct: 10  Precision @ 1: 0.6250
Validation Data Eval:
  Num examples: 10000  Num correct: 7727  Precision @ 1: 0.7727
Step 300: loss = 1.32 (0.019 sec)
Step 350: loss = 0.83 (0.009 sec)
Training Data Eval:
  Num examples: 16  Num correct: 16  Precision @ 1: 1.0000
Validation Data Eval:
  Num examples: 10000  Num correct: 7920  Precision @ 1: 0.7920
Step 400: loss = 0.12 (0.010 sec)
Step 450: loss = 0.67 (0.008 sec)
Trai

---
Problem 1
---------

The convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.

---

In [27]:
def inference_hidden5(images, isEval=None):
    with tf.variable_scope('layer1', reuse=isEval):
        weights = tf.get_variable("weights", [patch_size, patch_size, num_channels, depth],
            initializer=tf.random_normal_initializer(0.0, 0.1, seed=SEED))    
        biases = tf.get_variable("biases", [depth], initializer=tf.constant_initializer(0.0))
        
        conv = tf.nn.conv2d(images, weights, [1, 1, 1, 1], padding='SAME')    
        relu = tf.nn.relu(tf.nn.bias_add(conv, biases))
        pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
        
    with tf.variable_scope('layer2', reuse=isEval):
        weights = tf.get_variable("weights", [patch_size, patch_size, depth, depth],
            initializer=tf.random_normal_initializer(0.0, 0.1, seed=SEED))    
        biases = tf.get_variable("biases", [depth], initializer=tf.constant_initializer(1.0))
        
        conv = tf.nn.conv2d(pool, weights, [1, 1, 1, 1], padding='SAME')
        relu = tf.nn.relu(tf.nn.bias_add(conv , biases))
        pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
         
    with tf.variable_scope('layer3', reuse=isEval):
        weights = tf.get_variable("weights", [image_size / 4 * image_size / 4 * depth, num_hidden],
            initializer=tf.random_normal_initializer(0.0, 0.1, seed=SEED))
        biases = tf.get_variable("biases", [num_hidden], initializer=tf.constant_initializer(1.0))
    
        shape = pool.get_shape().as_list()
        reshape = tf.reshape(pool, [shape[0], shape[1] * shape[2] * shape[3]])
        relu = tf.nn.relu(tf.matmul(reshape, weights) + biases)
        
    with tf.variable_scope('layer4', reuse=isEval):
        weights = tf.get_variable("weights", [num_hidden, NUM_CLASSES],
            initializer=tf.random_normal_initializer(0.0, 0.1, seed=SEED))
        biases = tf.get_variable("biases", [NUM_CLASSES], initializer=tf.constant_initializer(1.0))
        
        logits = tf.matmul(relu, weights) + biases
        
    return logits

In [28]:
process(train_dataset, train_labels, inference_hidden5)

Step 0: loss = 4.14 (0.074 sec)
Step 50: loss = 0.89 (0.023 sec)
Training Data Eval:
  Num examples: 16  Num correct: 8  Precision @ 1: 0.5000
Validation Data Eval:
  Num examples: 10000  Num correct: 6451  Precision @ 1: 0.6451
Step 100: loss = 1.52 (0.039 sec)
Step 150: loss = 0.87 (0.033 sec)
Training Data Eval:
  Num examples: 16  Num correct: 10  Precision @ 1: 0.6250
Validation Data Eval:
  Num examples: 10000  Num correct: 7672  Precision @ 1: 0.7672
Step 200: loss = 0.59 (0.042 sec)
Step 250: loss = 0.59 (0.024 sec)
Training Data Eval:
  Num examples: 16  Num correct: 9  Precision @ 1: 0.5625
Validation Data Eval:
  Num examples: 10000  Num correct: 7793  Precision @ 1: 0.7793
Step 300: loss = 1.13 (0.029 sec)
Step 350: loss = 0.75 (0.022 sec)
Training Data Eval:
  Num examples: 16  Num correct: 14  Precision @ 1: 0.8750
Validation Data Eval:
  Num examples: 10000  Num correct: 8122  Precision @ 1: 0.8122
Step 400: loss = 0.31 (0.027 sec)
Step 450: loss = 0.51 (0.023 sec)
Train

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---

In [None]:
patch_size = 5
depth = 16
num_hidden = 64

def inference_hidden6(images, isEval=None):
    with tf.variable_scope('layer1', reuse=isEval):
        weights = tf.get_variable("weights", [patch_size, patch_size, num_channels, depth],
            initializer=tf.random_normal_initializer(0.0, 0.1, seed=SEED))    
        biases = tf.get_variable("biases", [depth], initializer=tf.constant_initializer(0.0))
        
        conv = tf.nn.conv2d(images, weights, [1, 1, 1, 1], padding='SAME')    
        relu = tf.nn.relu(tf.nn.bias_add(conv, biases))
        pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
        
    with tf.variable_scope('layer2', reuse=isEval):
        weights = tf.get_variable("weights", [patch_size, patch_size, depth, depth/2],
            initializer=tf.random_normal_initializer(0.0, 1.0/math.sqrt(patch_size*patch_size), seed=SEED))    
        biases = tf.get_variable("biases", [depth/2], initializer=tf.constant_initializer(1.0))
        
        conv = tf.nn.conv2d(pool, weights, [1, 1, 1, 1], padding='SAME')
        relu = tf.nn.relu(tf.nn.bias_add(conv , biases))
        pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
         
    with tf.variable_scope('layer3', reuse=isEval):
        weights = tf.get_variable("weights", [image_size / 4 * image_size / 4 * depth, num_hidden],
            initializer=tf.random_normal_initializer(0.0, 1.0/math.sqrt(image_size / 4 * image_size / 4 * depth), seed=SEED))
        biases = tf.get_variable("biases", [num_hidden], initializer=tf.constant_initializer(1.0))
    
        shape = pool.get_shape().as_list()
        reshape = tf.reshape(pool, [shape[0], shape[1] * shape[2] * shape[3]])
        relu = tf.nn.relu(tf.matmul(reshape, weights) + biases)
        
    with tf.variable_scope('layer4', reuse=isEval):
        weights = tf.get_variable("weights", [num_hidden, NUM_CLASSES],
            initializer=tf.random_normal_initializer(0.0, 0.1, seed=SEED))
        biases = tf.get_variable("biases", [NUM_CLASSES], initializer=tf.constant_initializer(1.0))
        
        logits = tf.matmul(relu, weights) + biases
        
    return logits