In [4]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import time

In [5]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [6]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

def reformat(dataset, labels):
    dataset = dataset.reshape(
        (-1, image_size, image_size, num_channels)).astype(np.float32)
    labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32)
    return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [4]:
# 2 convolutional layers + 1 fully connected layer

batch_size = 16
patch_size = 5 # kernel size
depth = 16
num_hidden = 64
sd = 0.1

conv_graph = tf.Graph()

with conv_graph.as_default():
    
    # input data
    # training data (batch => use of placeholder instead of variable)
    # input tensor shape : [batch, in_height, in_width, in_channels]
    tf_train_dataset = tf.placeholder(
            tf.float32, shape = (batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(
            tf.float32, shape = (batch_size, num_labels))
    # validation and test data
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    # Variables
    # filter / kernel tensor shape:  [filter_height, filter_width, in_channels, out_channels]
    
    # 2 convolutional layers
    cv1_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev = sd)
    )
    cv1_biases = tf.Variable(tf.zeros(shape = [depth]))
    
    cv2_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev = sd)
    )
    cv2_biases = tf.Variable(tf.constant(1.0, shape = [depth]))
    
    # 1 fully-connected layer
    fc_weights = tf.Variable(tf.truncated_normal(
            [(image_size//4)**2 * depth, num_hidden], stddev = sd)
            # stride = 2
    )
    fc_biases =  tf.Variable(tf.constant(1.0, shape = [num_hidden]))
    
    # ouput layer
    output_weights = tf.Variable(tf.truncated_normal(
            [num_hidden, num_labels], stddev = sd)
    )
    output_biases = tf.Variable(tf.constant(1.0, shape = [num_labels]))
    
    # tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)
    # Computes a 2-D convolution given 4-D input and filter tensors.
    # input tensor of shape : [batch, in_height, in_width, in_channels]
    # filter / kernel tensor of shape : [filter_height, filter_width, in_channels, out_channels]
    # Strides : strides[0] = strides[3] = 1.
    # => [1, stride, stride, 1].
    
    # Model
    def model (data):
        conv = tf.nn.conv2d(data, cv1_weights, [1, 2, 2, 1], padding = 'SAME') # stride = 2
        hidden = tf.nn.relu(conv + cv1_biases)
        conv = tf.nn.conv2d(hidden, cv2_weights, [1, 2, 2, 1], padding = 'SAME')
        hidden = tf.nn.relu(conv + cv2_biases)
        shape = hidden.get_shape().as_list()
        reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
        hidden = tf.nn.relu(tf.matmul(reshape, fc_weights) + fc_biases)
        return tf.matmul(hidden, output_weights) + output_biases
    
    # Training
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)
    )
    
    # Optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    
    # Predictions for the training, validation and test data
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [5]:
num_steps = 1001

with tf.Session(graph = conv_graph) as sess:
    tf.initialize_all_variables().run()
    print('initialized')
    for step in range (num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = sess.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
          print('Minibatch loss at step %d: %f' % (step, l))
          print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
          print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

initialized
Minibatch loss at step 0: 3.211586
Minibatch accuracy: 12.5%
Validation accuracy: 12.1%
Minibatch loss at step 50: 1.653821
Minibatch accuracy: 50.0%
Validation accuracy: 42.0%
Minibatch loss at step 100: 0.531900
Minibatch accuracy: 87.5%
Validation accuracy: 67.6%
Minibatch loss at step 150: 0.501262
Minibatch accuracy: 87.5%
Validation accuracy: 75.2%
Minibatch loss at step 200: 0.798493
Minibatch accuracy: 75.0%
Validation accuracy: 76.0%
Minibatch loss at step 250: 0.807040
Minibatch accuracy: 81.2%
Validation accuracy: 78.9%
Minibatch loss at step 300: 0.398531
Minibatch accuracy: 93.8%
Validation accuracy: 79.6%
Minibatch loss at step 350: 1.151908
Minibatch accuracy: 75.0%
Validation accuracy: 79.5%
Minibatch loss at step 400: 0.279726
Minibatch accuracy: 100.0%
Validation accuracy: 80.8%
Minibatch loss at step 450: 0.287894
Minibatch accuracy: 100.0%
Validation accuracy: 80.6%
Minibatch loss at step 500: 0.761354
Minibatch accuracy: 75.0%
Validation accuracy: 81.1%

### Problem 1

In [7]:
def fcinputsize (i, k, s, n):
    m = 1
    #i = (i-k) // s + 1
    #if n==1:
    #    if o % s == 0:
    #        return o
    #    else:
    #        return o+1
    #else:
    while (m<=n):
        
        
        if (i-k) % s != 0:
            i = (i-k) // s + 1 +1
            return i
        i = (i-k) // s + 1
        m += 1
    return i
    

print(fcinputsize(28, 3, 2, 3))

14


In [8]:
# max pooling
k=2
s=2
strides = [1, s, s, 1] # strides = 2
ksize = [1, k, k, 1]  # kernel size = 2

# 2 convolutional layers + 1 fully connected layer

batch_size = 16
patch_size = 5 # kernel size
depth = 16
num_hidden = 64
sd = 0.1
num_convlayer = 2

conv_graph = tf.Graph()

with conv_graph.as_default():
    
    # input data
    # training data (batch => use of placeholder instead of variable)
    # input tensor shape : [batch, in_height, in_width, in_channels]
    tf_train_dataset = tf.placeholder(
            tf.float32, shape = (batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(
            tf.float32, shape = (batch_size, num_labels))
    # validation and test data
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    # Variables
    # filter / kernel tensor shape:  [filter_height, filter_width, in_channels, out_channels]
    
    # 2 convolutional layers
    cv1_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev = sd)
    )
    cv1_biases = tf.Variable(tf.zeros(shape = [depth]))
    
    cv2_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev = sd)
    )
    cv2_biases = tf.Variable(tf.constant(1.0, shape = [depth]))
    
    # 1 fully-connected layer
    fc_weights = tf.Variable(tf.truncated_normal(
            [fcinputsize(image_size, k, s, num_convlayer)**2 * depth, num_hidden], stddev = sd)
            # stride = 2
            # o = (i-k // s) +1                             
    )
    fc_biases =  tf.Variable(tf.constant(1.0, shape = [num_hidden]))
    
    # ouput layer
    output_weights = tf.Variable(tf.truncated_normal(
            [num_hidden, num_labels], stddev = sd)
    )
    output_biases = tf.Variable(tf.constant(1.0, shape = [num_labels]))
    


    
    # tf.nn.max_pool(value, ksize, strides, padding, data_format='NHWC', name=None)
    # input tensor of shape : [batch, in_height, in_width, in_channels]
    # ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.
    # strides: A list of ints that has length >= 4.
    # The stride of the sliding window for each dimension of the input tensor.
    # padding: A string, either 'VALID' or 'SAME'. The padding algorithm.
    
    # strides = s
    # ksize = k
    # Model
    def model (data):
        
        # conv-layer 1
        conv = tf.nn.conv2d(data, cv1_weights, [1, 1, 1, 1], padding='SAME')
        hidden = tf.nn.relu(conv + cv1_biases)
        hidden = tf.nn.max_pool(hidden, ksize, strides, padding = 'SAME')
        
        # conv-layer 2
        conv = tf.nn.conv2d(hidden, cv2_weights, [1, 1, 1, 1], padding = 'SAME')
        hidden = tf.nn.relu(conv + cv2_biases)
        hidden = tf.nn.max_pool(hidden, ksize, strides, padding = 'SAME')
        
        shape = hidden.get_shape().as_list()
        reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
        print(shape)
        print(fc_weights.get_shape().as_list())
        hidden = tf.nn.relu(tf.matmul(reshape, fc_weights) + fc_biases)
        return tf.matmul(hidden, output_weights) + output_biases
    
    # Training
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)
    )
    
    # Optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    
    # Predictions for the training, validation and test data
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))
    
num_steps = 1001

with tf.Session(graph = conv_graph) as sess:
    tf.initialize_all_variables().run()
    print('initialized')
    for step in range (num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = sess.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
          print('Minibatch loss at step %d: %f' % (step, l))
          print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
          print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

[16, 7, 7, 16]
[784, 64]
[10000, 7, 7, 16]
[784, 64]
[10000, 7, 7, 16]
[784, 64]
initialized
Minibatch loss at step 0: 3.913937
Minibatch accuracy: 6.2%
Validation accuracy: 10.3%
Minibatch loss at step 50: 2.035770
Minibatch accuracy: 25.0%
Validation accuracy: 38.5%
Minibatch loss at step 100: 0.964966
Minibatch accuracy: 62.5%
Validation accuracy: 57.4%
Minibatch loss at step 150: 0.742281
Minibatch accuracy: 75.0%
Validation accuracy: 72.9%
Minibatch loss at step 200: 0.706681
Minibatch accuracy: 81.2%
Validation accuracy: 76.4%
Minibatch loss at step 250: 0.866497
Minibatch accuracy: 81.2%
Validation accuracy: 77.4%
Minibatch loss at step 300: 0.268470
Minibatch accuracy: 93.8%
Validation accuracy: 79.7%
Minibatch loss at step 350: 0.893159
Minibatch accuracy: 75.0%
Validation accuracy: 80.7%
Minibatch loss at step 400: 0.213691
Minibatch accuracy: 100.0%
Validation accuracy: 81.1%
Minibatch loss at step 450: 0.368662
Minibatch accuracy: 93.8%
Validation accuracy: 81.3%
Minibatch 

### Problem 2

In [None]:
# 3 - max pooling layers
# 2 - fully connected layers
# learning rate decay
# regularization

k=3
s=2

strides = [1, s, s, 1] # max pooling strides = 2
ksize = [1, k, k, 1]  # max pooling kernel size = 2

# 2 convolutional layers + 1 fully connected layer

batch_size = 20
patch_size = 5 # conv - kernel size
depth = 16
num_hidden_1 = 64
num_hidden_2 = 20
sd = 0.1
num_steps = 1001

beta_cv1 = 0.00001
beta_cv2 = 0.00001
beta_cv3 = 0.00001
beta_fc1 = 0.00001
beta_fc2 = 0.00001
beta_out = 0.00001



num_convlayer = 3

conv_graph = tf.Graph()

with conv_graph.as_default():
    
    # input data
    # training data (batch => use of placeholder instead of variable)
    # input tensor shape : [batch, in_height, in_width, in_channels]
    tf_train_dataset = tf.placeholder(
            tf.float32, shape = (batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(
            tf.float32, shape = (batch_size, num_labels))
    # validation and test data
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    # Variables
    # filter / kernel tensor shape:  [filter_height, filter_width, in_channels, out_channels]
    
    # 3 convolutional layers
    cv1_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, num_channels, depth], stddev = sd)
    )
    cv1_biases = tf.Variable(tf.zeros(shape = [depth]))
    
    cv2_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev = sd)
    )
    cv2_biases = tf.Variable(tf.constant(1.0, shape = [depth]))
    
    cv3_weights = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, depth, depth], stddev = sd)
    )
    cv3_biases = tf.Variable(tf.constant(1.0, shape = [depth]))
    
    # 2 fully-connected layer
    fc1_weights = tf.Variable(tf.truncated_normal(
            [fcinputsize(image_size, k, s, num_convlayer)**2 * depth, num_hidden_1], stddev = sd)
            # stride = 2
            # o = (i-k // s) +1
    )
    
    fc1_biases =  tf.Variable(tf.constant(1.0, shape = [num_hidden_1]))
    
    fc2_weights = tf.Variable(tf.truncated_normal(
            [num_hidden_1, num_hidden_2], stddev = sd)
    )
    fc2_biases =  tf.Variable(tf.constant(1.0, shape = [num_hidden_2]))
    
    
    # ouput layer
    output_weights = tf.Variable(tf.truncated_normal(
            [num_hidden_2, num_labels], stddev = sd)
    )
    output_biases = tf.Variable(tf.constant(1.0, shape = [num_labels]))
    
    # tf.nn.max_pool(value, ksize, strides, padding, data_format='NHWC', name=None)
    # input tensor of shape : [batch, in_height, in_width, in_channels]
    # ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.
    # strides: A list of ints that has length >= 4.
    # The stride of the sliding window for each dimension of the input tensor.
    # padding: A string, either 'VALID' or 'SAME'. The padding algorithm.
    
    # strides = 2
    # ksize = 2
    # Model
    def model (data):
        
        # conv-layer 1
        conv = tf.nn.conv2d(data, cv1_weights, [1, 1, 1, 1], padding='SAME')
        hidden = tf.nn.relu(conv + cv1_biases)
        hidden = tf.nn.max_pool(hidden, ksize, strides, padding = 'SAME')
        
        # conv-layer 2
        conv = tf.nn.conv2d(hidden, cv2_weights, [1, 1, 1, 1], padding = 'SAME')
        hidden = tf.nn.relu(conv + cv2_biases)
        hidden = tf.nn.max_pool(hidden, ksize, strides, padding = 'SAME')
        
        # conv-layer 3
        conv = tf.nn.conv2d(hidden, cv3_weights, [1, 1, 1, 1], padding = 'SAME')
        hidden = tf.nn.relu(conv + cv3_biases)
        hidden = tf.nn.max_pool(hidden, ksize, strides, padding = 'SAME')
        
        shape = hidden.get_shape().as_list()
        reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
        print(shape)
        print(fc1_weights.get_shape().as_list())
        print(fc2_weights.get_shape().as_list())
        
        # fc-layer 1
        hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
        
        # fc-layer 2
        hidden = tf.nn.relu(tf.matmul(hidden, fc2_weights) + fc2_biases)
        
        return tf.matmul(hidden, output_weights) + output_biases
    
    # Training
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)
    ) + 0.5 * (#beta_cv1 * tf.nn.l2_loss(cv1_weights)
               #+ beta_cv2 * tf.nn.l2_loss(cv2_weights)
               #+ beta_cv3 * tf.nn.l2_loss(cv3_weights)
               beta_fc1 * tf.nn.l2_loss(fc1_weights)
               + beta_fc2 * tf.nn.l2_loss(fc2_weights)
               + beta_out * tf.nn.l2_loss(output_weights))
    
    
    
    # learning rate decay
    global_step = tf.Variable(0)
    starter_learning_rate = 0.4
    learning_rate = tf.train.exponential_decay(
        starter_learning_rate, global_step, 600, 0.96, staircase = True)
    
    # Optimizer
    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate).minimize(loss, global_step = global_step)
    train_prediction = tf.nn.softmax(logits)
    
    # Predictions for the training, validation and test data
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))

tic = time.clock()

with tf.Session(graph = conv_graph) as sess:
    tf.initialize_all_variables().run()
    print('initialized')
    for step in range (num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = sess.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
          print('Minibatch loss at step %d: %f' % (step, l))
          print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
          print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
    
toc = time.clock()
toc-tic