# Convolutional Neural Networks 2

Previously we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [2]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [3]:
pickle_file = '/input/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [6]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

NameError: name 'train_dataset' is not defined

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---

In [77]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')

In [111]:
def conv_net(x, weights, biases, dropout):
    
    conv = conv2d(x, weights['wc1'], biases['bc1'])
    conv = maxpool2d(conv, k=2)

    conv = conv2d(conv, weights['wc2'], biases['bc2'])
    conv = maxpool2d(conv, k=2)
    
    # Fully connected layer
    shape = conv.get_shape().as_list()
    fc = tf.reshape(conv, [-1, shape[1] * shape[2] * shape[3]])
    fc = tf.matmul(fc, weights['wd1']) + biases['bd1']
    fc = tf.nn.relu(fc)

    fc = tf.nn.dropout(fc, dropout)

    out = tf.matmul(fc, weights['out']) + biases['out']
    return out

In [144]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale
dropout = 0.75

# Parameters
patch_size = 5
batch_size = 128
learning_rate = 0.001

graph = tf.Graph()

with graph.as_default():
    
    # Input data.
    x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, num_channels])
    y = tf.placeholder(tf.float32, shape=[None, num_labels])
         
    keep_prob = tf.placeholder(tf.float32) 
    
    
    # Variables.
    weights = {
        # 5x5 conv, 1 input, 32 outputs
        'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 16], stddev=0.1)),
        # 5x5 conv, 32 inputs, 64 outputs
        'wc2': tf.Variable(tf.truncated_normal([5, 5, 16, 16], stddev=0.1)),
        # fully connected, 7*7*64 inputs, 1024 outputs
        'wd1': tf.Variable(tf.truncated_normal([7*7*16, 1024], stddev=0.1)),
        # 1024 inputs, 10 outputs (class prediction)
        'out': tf.Variable(tf.truncated_normal([1024, num_labels], stddev=0.1))
    }

    biases = {
        'bc1': tf.Variable(tf.random_normal([16])),
        'bc2': tf.Variable(tf.random_normal([16])),
        'bd1': tf.Variable(tf.random_normal([1024])),
        'out': tf.Variable(tf.random_normal([num_labels]))
    }
    
    # Construct model
    pred = conv_net(x, weights, biases, keep_prob)

    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    # Evaluate model
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Initializing the variables
    init = tf.global_variables_initializer()
    

[None, 7, 7, 16]


In [146]:
def next_batch(batch_size, x, y, step):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_x = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_y = train_labels[offset:(offset + batch_size), :]
    return batch_x, batch_y

num_steps = 3001

with tf.Session(graph=graph) as sess:
    sess.run(init)
    # Keep training until reach max iterations
    for step in range(num_steps):
        batch_x, batch_y = next_batch(batch_size, x, y, step)
        
        # Run optimization op (backprop)
        sess.run(optimizer, feed_dict={x: batch_x, 
                                       y: batch_y,
                                       keep_prob: dropout})
        if step % display_step == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
                                                              y: batch_y,
                                                              keep_prob: 1.})
            valid_acc = sess.run(accuracy, feed_dict={x: valid_dataset,
                                                       y: valid_labels,
                                                       keep_prob: 1.})
            print("Iter " + str(step) + ", Minibatch Loss= " +
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc) + ", Validation Accuracy =" + \
                  "{:.5f}".format(valid_acc))
        step += 1
    print("Optimization Finished!")

    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: test_dataset,
                                      y: test_labels,
                                      keep_prob: 1.}))


Iter 0, Minibatch Loss= 5.522115, Training Accuracy= 0.15625, Validation Accuracy =0.13100
Iter 100, Minibatch Loss= 0.769351, Training Accuracy= 0.80469, Validation Accuracy =0.81260
Iter 200, Minibatch Loss= 0.379498, Training Accuracy= 0.89844, Validation Accuracy =0.84740
Iter 300, Minibatch Loss= 0.391994, Training Accuracy= 0.89844, Validation Accuracy =0.85430
Iter 400, Minibatch Loss= 0.466789, Training Accuracy= 0.85938, Validation Accuracy =0.86530
Iter 500, Minibatch Loss= 0.348914, Training Accuracy= 0.88281, Validation Accuracy =0.87220
Iter 600, Minibatch Loss= 0.407786, Training Accuracy= 0.87500, Validation Accuracy =0.87720
Iter 700, Minibatch Loss= 0.445098, Training Accuracy= 0.86719, Validation Accuracy =0.87920
Iter 800, Minibatch Loss= 0.461619, Training Accuracy= 0.85938, Validation Accuracy =0.88430
Iter 900, Minibatch Loss= 0.512550, Training Accuracy= 0.87500, Validation Accuracy =0.88740
Iter 1000, Minibatch Loss= 0.339348, Training Accuracy= 0.89844, Validat