In [None]:
import numpy as np
import cPickle
import os
import gzip
import time
import tensorflow as tf
import matplotlib

# Second exercise: Classifying MNIST with Tensorflow Network


# Data Loading
We first define a function for downloading and loading MNIST.
**WARNING**: Executing it will obviously use up some space on your machine ;). 

In [None]:
def mnist(datasets_dir='./data'):
    if not os.path.exists(datasets_dir):
        os.mkdir(datasets_dir)
    data_file = os.path.join(datasets_dir, 'mnist.pkl.gz')
    if not os.path.exists(data_file):
        print('... downloading MNIST from the web')
        try:
            import urllib
            urllib.urlretrieve('http://google.com')
        except AttributeError:
            import urllib.request as urllib
        url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        urllib.urlretrieve(url, data_file)

    print('... loading data')
    # Load the dataset
    f = gzip.open(data_file, 'rb')
    try:
        train_set, valid_set, test_set = cPickle.load(f, encoding="latin1")
    except TypeError:
        train_set, valid_set, test_set = cPickle.load(f)
    f.close()

    test_x, test_y = test_set
    test_x = test_x.astype('float32')
    test_x = test_x.astype('float32').reshape(test_x.shape[0], 1, 28, 28)
    test_y = test_y.astype('int32')
    valid_x, valid_y = valid_set
    valid_x = valid_x.astype('float32')
    valid_x = valid_x.astype('float32').reshape(valid_x.shape[0], 1, 28, 28)
    valid_y = valid_y.astype('int32')
    train_x, train_y = train_set
    train_x = train_x.astype('float32').reshape(train_x.shape[0], 1, 28, 28)
    train_y = train_y.astype('int32')
    rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)]
    print('... done loading data')
    return rval

# Build up the network


The goal was it to build up a cnn with tensorflow and observe what some parameters/configurations change.

Most of the code could be copied from the tensorflow tutorial. Only minor changes were needed.
Source: https://www.tensorflow.org/get_started/mnist/pros

What changed in comparison to the tutorial online:
    1. the datasource was changed to the mnist set from the previous exercise
    2. the filter sizes were adjusted
    3. removed drop out (as it's not required in the exercise)
    4. added variable filter size   

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys
import tempfile

import tensorflow as tf

FLAGS = None

def deepnn(x, no_filters):
  """deepnn builds the graph for a deep net for classifying digits.
  Args:
    x: an input tensor with the dimensions (N_examples, 784), where 784 is the
    number of pixels in a standard MNIST image.
  Returns:
    A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
    equal to the logits of classifying the digit into one of 10 classes (the
    digits 0-9).
  """
  # Reshape to use within a convolutional neural net.
  # Last dimension is for "features" - there is only one here, since images are
  # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
  with tf.name_scope('reshape'):
    x_image = tf.reshape(x, [-1, 28, 28, 1])

  # First convolutional layer
  with tf.name_scope('conv1'):
    W_conv1 = weight_variable([3, 3, 1, no_filters])
    b_conv1 = bias_variable([no_filters])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

  # Pooling layer
  with tf.name_scope('pool1'):
    h_pool1 = max_pool_2x2(h_conv1)
    

  # Second convolutional layer
  with tf.name_scope('conv2'):
    W_conv2 = weight_variable([3, 3, no_filters, no_filters])
    b_conv2 = bias_variable([no_filters])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    
  # Second pooling layer
  with tf.name_scope('pool2'):
    h_pool2 = max_pool_2x2(h_conv2)
    
    print(h_pool2.shape)


  # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
  # is down to 7x7x(no_filters) feature maps -- maps this to 128 features.
  with tf.name_scope('fc1'):
    W_fc1 = weight_variable([7 * 7 * no_filters, 128])
    b_fc1 = bias_variable([128])

    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * no_filters])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)


  # Map the 128 features to 10 classes, one for each digit
  with tf.name_scope('fc2'):
    W_fc2 = weight_variable([128, 10])
    b_fc2 = bias_variable([10])
    y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2


  return y_conv


def conv2d(x, W):
  """conv2d returns a 2d convolution layer with full stride."""
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
  """max_pool_2x2 downsamples a feature map by 2X."""
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


def weight_variable(shape):
  """weight_variable generates a weight variable of a given shape."""
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)


def bias_variable(shape):
  """bias_variable generates a bias variable of a given shape."""
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)


def one_hot(labels):
  """this creates a one hot encoding from a flat vector:
  i.e. given y = [0,2,1]
  it creates y_one_hot = [[1,0,0], [0,0,1], [0,1,0]]
  """
  classes = np.unique(labels)
  n_classes = classes.size
  one_hot_labels = np.zeros(labels.shape + (n_classes,))
  for c in classes:
      one_hot_labels[labels == c, c] = 1
  return one_hot_labels



def main(_):
  Dtrain, Dval, Dtest = mnist()
  X_train, y_train = Dtrain
  X_test, y_test = Dtest 
  x_val, y_val = Dval
    
  X_train2 = X_train[:,0,:,:]
  X_training = X_train2.reshape(50000,784)
  Y_training = one_hot(y_train)

  x_val2 = x_val[:,0,:,:]
  X_validation = x_val2.reshape(10000,784)
  Y_validation = one_hot(y_val)
  print(X_training.shape)
    
  
  hyperparams_learning_rate = [0.1, 0.01, 0.001, 0.0001]
  hyperparams_no_filters = [8, 16, 32, 64, 128, 256]
  for hyp_l_r in range(4):       
    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])
    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])
    # Build the graph for the deep net
    y_conv = deepnn(x, hyperparams_no_filters[1])

    with tf.name_scope('loss'):
      cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                            logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

        
    with tf.name_scope('stochastic_gradient_optimizer'):
      train_step = tf.train.GradientDescentOptimizer(hyperparams_learning_rate[hyp_l_r]).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
      correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
      correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    graph_location = tempfile.mkdtemp()
    print('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())
    config = tf.ConfigProto(
       device_count = {'GPU': 0}
    )
    
    path = '/home/johannes/file'+str(hyperparams_learning_rate[hyp_l_r])+'.txt'
    learning = open(path,'w')
    
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      for i in range(7500):        
        validation_accuracy = accuracy.eval(feed_dict={
        x: X_validation[0:10000,:], y_: Y_validation[0:10000,:]})
        t1 = time.time() 
        for ii in range(10):
            train_step.run(feed_dict={x: X_training[5000*ii:5000*(ii+1),:], y_: Y_training[5000*ii:5000*(ii+1),:]})
        t2 = time.time()        
        print('step %d, validation accuracy %g' % (i, validation_accuracy))
        print(t2-t1)
        learning.write(str(validation_accuracy)+'\n')
      learning.close()

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('--data_dir', type=str,
                      default='/tmp/tensorflow/mnist/input_data',
                      help='Directory for storing input data')
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

# Tensorflow

For the exercise 2.3 I just copied the code above and made little changes to measure the execution time

In [None]:
def main(_):
  Dtrain, Dval, Dtest = mnist()
  X_train, y_train = Dtrain
  X_test, y_test = Dtest 
  x_val, y_val = Dval
    
  X_train2 = X_train[:,0,:,:]
  X_training = X_train2.reshape(50000,784)
  Y_training = one_hot(y_train)

  x_val2 = x_val[:,0,:,:]
  X_validation = x_val2.reshape(10000,784)
  Y_validation = one_hot(y_val)
  print(X_training.shape)
    
  hyperparams_no_filters = [8, 16, 32, 64, 128, 256]
  for hyp_l_r in range(6):
    #for hyp_n_f in range(6):
    
    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv = deepnn(x, hyperparams_no_filters[hyp_l_r])

    with tf.name_scope('loss'):
      cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
                                                            logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

        
    with tf.name_scope('stochastic_gradient_optimizer'):
      train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
      correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
      correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    graph_location = tempfile.mkdtemp()
    print('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())
    config = tf.ConfigProto(
       device_count = {'GPU': 0}
    )
    
    
    with tf.Session(config=config) as sess:
      sess.run(tf.global_variables_initializer())        
      t1 = time.time() 
      # to measure the runtime its not necessary to run whole epochs. I just take 500 steps to have reduce the python 
      # overhed of the calculations
      for i in range(500):
        ii = 1
        train_step.run(feed_dict={x: X_training[500*ii:500*(ii+1),:], y_: Y_training[500*ii:500*(ii+1),:]})
      t2 = time.time()        
      print(t2-t1)
    

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('--data_dir', type=str,
                      default='/tmp/tensorflow/mnist/input_data',
                      help='Directory for storing input data')
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

# Plot gerneration (only for completion)
The data have bin copied by hand or stored in text files. The actual plots can be found in the pdf.

Plot 1 (Learning rates):

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
hyperparams_learning_rate = [0.1, 0.01, 0.001, 0.0001]
data = []

for hyp_l_r in range(4):
    path = '/home/johannes/file' + str(hyperparams_learning_rate[hyp_l_r]) + '.txt'
    with open(path) as f:
        content = f.readlines()
    content = [float(x.strip())*100 for x in content]
    data.append(content)

x = list(range(0, len(content)))

plt.plot(x, data[0], label="0.1")
plt.plot(x, data[1], label="0.01")
plt.plot(x, data[2], label="0.001")
plt.plot(x, data[3], label="0.0001")
plt.axis([0, 7500,0,100])
plt.xlabel('Epochs')
plt.ylabel('Validation Error in %')

plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0., title='Learning rate')

plt.show()

Plot 2 (Scatter GPU/CPU):

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

GPU_tuple = [(4.2710609436, 8) , (5.86049294472, 16) , (8.88112902641, 32) , (17.1756880283, 64), (34.8885371685, 128), (77.2009470463, 256)]
CPU_tuple = [(43.9258110523, 8), (68.2844769955, 16), (173.806190968, 32), (446.362864017,64)]

x = [8, 16, 32, 64, 128, 256]
x2 = [8, 16, 32, 64]
y = [4.2710609436 , 5.86049294472, 8.88112902641, 17.1756880283, 34.8885371685, 77.2009470463]
y2 = [43.9258110523, 68.2844769955, 173.806190968, 446.362864017]
color = ['r','r','r','r','r','r','b','b','b','b']
sc1 = plt.scatter(x, y, c='r', alpha=0.5)
sc2 = plt.scatter(x2, y2, c='b', alpha=0.5)

plt.xlabel('Filter')
plt.ylabel('Ausführungszeit in Sekunden')

plt.legend((sc1, sc2),('GPU','CPU'), title='Berechnet auf:')

plt.show()