In [3]:
__doc__ = """
    This file was first used for working on the Single/Cropped Images and contains 
    the data preparation steps for that dataset. However, the really interesting neural networks 
    bits are present in SingleImages_Explore. The implementation here is more of a sanity check.
    
    Please look at SingleImages_Explore for a more comprehensive study of the dataset.
"""

In [None]:
### import the required libraries

import numpy as np
import scipy.ndimage as ndimage
import scipy.io as sio
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import math
import os
import pylab as P
import skimage
from sklearn.cross_validation import train_test_split as ttsplit

In [None]:
## define the directory where the data is

# go to the parent of the current dir
data_parent = os.path.split(os.getcwd())[0]
data_path = os.path.join(data_parent, 'data')
if not os.path.exists(data_path):
   print "please define the data_path variable to where data sits"
   raise e
        

In [None]:
train_mat = os.path.join(data_path, 'train_32x32.mat')
test_mat = os.path.join(data_path, 'test_32x32.mat')
extra_mat = os.path.join(data_path, 'extra_32x32.mat')
train_data  = sio.loadmat(train_mat)
test_data = sio.loadmat(test_mat)
extra_data  = sio.loadmat(extra_mat)

In [None]:
# The images are stored in X and the labels are stored in y
IMAGE_KEY = 'X'
LABEL_KEY = 'y'
print train_data[IMAGE_KEY].shape
print train_data[LABEL_KEY].shape

print test_data[IMAGE_KEY].shape
print test_data[LABEL_KEY].shape

print extra_data[IMAGE_KEY].shape
print extra_data[LABEL_KEY].shape

# The sizes match - good first step
TRAIN_DATASET_SIZE = train_data[LABEL_KEY].shape[0]
TEST_DATASET_SIZE  = test_data[LABEL_KEY].shape[0]
EXTRA_DATASET_SIZE = extra_data[LABEL_KEY].shape[0]## Get an idea of the labels we are dealing with
train_label_set = set(train_data[LABEL_KEY].reshape(TRAIN_DATASET_SIZE))
test_label_set = set(test_data[LABEL_KEY].reshape(TEST_DATASET_SIZE))
extra_label_set = set(extra_data[LABEL_KEY].reshape(EXTRA_DATASET_SIZE))
assert(train_label_set == test_label_set)
assert(train_label_set == extra_label_set)

In [None]:
## Hurray - no mismatching labels! Phew! Always awkward when that happens
## On second thoughts, this is guaranteed to be a well curated data set (thanks, Google!)
## Don't need to run basic sanity checks

print train_label_set## The labels are from 1 to 10, as already explained on the SVHN page.
## we will replace 10 by 0
train_labels_replace = train_data[LABEL_KEY].reshape(TRAIN_DATASET_SIZE)
train_labels_replace[train_labels_replace == 10]  = 0
train_labels_replace = train_labels_replace.astype(np.int32)

extra_labels = extra_data[LABEL_KEY].reshape(EXTRA_DATASET_SIZE)
extra_labels[extra_labels == 10] = 0
extra_labels_replace = extra_labels.astype(np.int32)

test_labels = test_data[LABEL_KEY].reshape(TEST_DATASET_SIZE).astype(np.int32)
test_labels[test_labels==10] = 0

print set(train_labels_replace)
assert(set(train_labels_replace) == set(test_labels))
assert(set(extra_labels) == set(train_labels_replace))

num_labels = len(train_label_set)

In [None]:
## cleaned up the labels for use -> now, let's work work with the images themselves
## The vector is provided 32, 32, 3, 73257 -> let's see how this looks

display_size  = 10
rand_indices = np.random.randint(0, TRAIN_DATASET_SIZE, size=display_size)
fig = P.figure(figsize=(20, 20))
for idx in range(len(rand_indices)):
    a=fig.add_subplot(1,display_size,idx+1)
    P.imshow(train_data[IMAGE_KEY][:,:,:,rand_indices[idx]])
    a.set_title(str(train_labels_replace[rand_indices[idx]]))
    

In [None]:
## Let's get on with it. we need to move the indices around in a way in which we understand them 
## we want the first index to select the image, the second to select a row, the third to select a column
## and the fourth to select the colour (r,g,b)

## since the images are all aligned correctly, simply transpose the last index to the first
train_dataset_unnorm = train_data[IMAGE_KEY].transpose((3,0,1,2)).astype(np.float32)
test_images_unnorm  = test_data[IMAGE_KEY].transpose((3,0,1,2)).astype(np.float32)
extra_images_unnorm = extra_data[IMAGE_KEY].transpose((3,0,1,2)).astype(np.float32)#train_dataset = np.concatenate((train_dataset, extra_images), axis = 0)
#train_labels_replace = np.concatenate((train_labels_replace, extra_labels), axis = 0)

In [None]:
print train_dataset_unnorm[0, 0, 0, :]
print test_images_unnorm[0, 0, 0, :]
print extra_images_unnorm[0, 0, 0, :]

def convert_to_grayscale(images):
    """
        https://groups.google.com/forum/#!topic/sci.image.processing/Jf-aTjPEgjc
    """
    return np.dot(images, [[0.299],[0.587],[0.114]])

def normalize(ds):
    ds = convert_to_grayscale(ds)
    return (ds  - np.mean(ds))/np.std(ds) 
    #mid_p)/pixel_depth
    #return skimage.img_as_float(ds)

train_dataset = normalize(train_dataset_unnorm).astype(np.float32)
test_images = normalize(test_images_unnorm).astype(np.float32)
extra_images = normalize(extra_images_unnorm).astype(np.float32)

print np.mean(train_dataset), np.std(train_dataset)
print np.mean(test_images), np.std(test_images)
print np.mean(extra_images), np.std(extra_images)

print train_dataset[0, 0, 0, :]
print test_images[0, 0, 0, :]
print extra_images[0, 0, 0, :]
print train_dataset.shape

In [None]:
for i in range(len(train_label_set)):
    print(i, len(train_labels_replace[train_labels_replace == i]))
    

In [None]:
## display images again to make sure that the index is now in the correct position
rand_indices = np.random.randint(0, TRAIN_DATASET_SIZE, size=display_size)
fig = P.figure(figsize=(20, 20))
for idx in range(len(rand_indices)):
    a=fig.add_subplot(1,display_size,idx+1)
    P.imshow(train_dataset[rand_indices[idx],:,:, 0], cmap='Greys_r')  ## note that the image id has moved to leftmost
    a.set_title(str(train_labels_replace[rand_indices[idx]]))

In [None]:
## As we can see, these numbers are sometimes in white and other times in black.
## we don't want this to interfere with the decision as it is quite irrelevant

## lets try some edge detectors

from skimage import feature

fig = P.figure(figsize=(20, 20))
for idx in range(len(rand_indices)):
    a=fig.add_subplot(1,display_size,idx+1)
    P.imshow(feature.canny(train_dataset[rand_indices[idx],:,:, 0]), cmap='Greys_r')  ## note that the image id has moved to leftmost
    a.set_title(str(train_labels_replace[rand_indices[idx]]))

In [None]:
# ok, so we have got the edges now. the 1s appear to be of different thickness. perhaps dilation would help?


from skimage import morphology

fig = P.figure(figsize=(20, 20))
for idx in range(len(rand_indices)):
    a=fig.add_subplot(1,display_size,idx+1)
    final_image = morphology.skeletonize(feature.canny(train_dataset[rand_indices[idx],:,:, 0]))
    P.imshow(final_image, cmap='Greys_r')  ## note that the image id has moved to leftmost
    a.set_title(str(train_labels_replace[rand_indices[idx]]))


In [None]:
## nice! now we can split the train images, train_data into a train and validation test set
## we will use the test data as is
## select some of the train dataset  as validation set

train_images, valid_images, train_labels, valid_labels = \
    ttsplit(train_dataset, train_labels_replace, test_size=0.1, random_state=7)
    

In [None]:
print train_images.shape
print train_images.dtype
print train_labels.shape
print train_labels.dtype

print valid_images.shape
print valid_images.dtype
print valid_labels.shape
print valid_labels.dtype

print test_images.shape
print test_images.dtype
print test_labels.shape
print test_labels.dtype

In [None]:
for i in range(len(train_label_set)):
    print(len(valid_labels[valid_labels == i]))
train_additional_images, valid_additional_images, train_additional_labels, valid_additional_labels = \
    ttsplit(extra_images, extra_labels_replace, test_size=0.005, random_state=7)

In [None]:
for i in range(len(train_label_set)):
    print(len(valid_additional_labels[valid_additional_labels == i]))
    
    

In [None]:
train_images = np.concatenate((train_images, train_additional_images), axis=0)
train_labels = np.concatenate((train_labels, train_additional_labels), axis=0)
valid_images = np.concatenate((valid_images, valid_additional_images), axis =0)
valid_labels = np.concatenate((valid_labels, valid_additional_labels), axis = 0)

print train_images.shape
print train_images.dtype
print train_labels.shape
print train_labels.dtype

print valid_images.shape
print valid_images.dtype
print valid_labels.shape
print valid_labels.dtype

print test_images.shape
print test_images.dtype
print test_labels.shape
print test_labels.dtype

In [None]:
batch_size = 64

In [None]:
remainder = train_images.shape[0]%batch_size
if remainder != 0:
    train_images = train_images[:-remainder]
    train_labels = train_labels[:-remainder]

In [None]:
def randomize(images, labels):
    assert(images.shape[0] == labels.shape[0])
    perm = np.random.permutation(images.shape[0])
    return images[perm], labels[perm]

In [None]:
valid_images, valid_labels = randomize(valid_images, valid_labels)
#test_images, test_labels = randomize(test_images, test_labels)
train_images, train_labels = randomize(train_images, train_labels)

In [None]:
print train_images.shape
print train_labels.shape[0]%batch_size

In [None]:
print "num batches per epoch: {}".format(train_labels.shape[0]/batch_size)

In [None]:
## save the data into a pickle file

import cPickle as pickle

f_to_data = {
    'train_single_data.pkl': train_images,
    'train_single_labels.pkl': train_labels,
    'valid_single_data.pkl': valid_images,
    'valid_single_labels.pkl': valid_labels,
    'test_single_data.pkl': test_images,
    'test_single_labels.pkl': test_labels,
}

In [None]:
for k,v in f_to_data.items():
    f_path = os.path.join(data_path, k)
    if not os.path.exists(f_path):
        with open(f_path, 'w') as f:
            print "writing {}".format(f_path)
            pickle.dump(v, f)

In [None]:
## always start a model with what you are going to measure it by
def accuracy(predictions, labels):
  assert(predictions.shape[0] == len(labels))
  return (100.0 * np.sum(np.argmax(predictions, 1) == labels)
          / predictions.shape[0])

In [None]:
patch_size = 5
depth = 16
num_hidden = 64
image_size = 32
num_channels = 1

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size))
  tf_valid_dataset = tf.constant(valid_images)
  tf_test_dataset = tf.constant(test_images)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1)) ## 5 x 5 x 1 x 16 -> 28 x 28 x 1 x 16
  layer1_biases = tf.Variable(tf.zeros([depth]))

  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))       ## 5 x 5 x 16 x 16 -> 24 x 24 x 16 x 16
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))

  layer3_weights = tf.Variable(tf.truncated_normal(
      #[image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1)) 
    [patch_size * patch_size * depth, num_hidden], stddev=0.1)) 
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))

  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data, is_training = False):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='VALID')
    pool = tf.nn.avg_pool(conv, [1, 4, 4, 1], [1,2,2,1], padding = 'SAME')
    hidden = tf.nn.relu(pool + layer1_biases)
    
    conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='VALID')
    pool = tf.nn.max_pool(conv, [1,1,1,1], [1,1,1,1], padding = 'SAME')
    hidden = tf.nn.relu(pool + layer2_biases)
    shape = hidden.get_shape().as_list()
    reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    if is_training:
        hidden  = tf.nn.dropout(hidden, keep_prob=0.5)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset, True)
  loss = tf.reduce_mean(
    tf.nn.sparse_softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
  # Optimizer.
  global_step = tf.Variable(0)  # count the number of steps taken.
    
  # params for exponential_decay API
  # https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#exponential_decay
  # values for learning_rate [0.05, 0.15, 0.2, 0.25, 0.5]

  learning_rate = tf.train.exponential_decay(0.15, global_step, 300, 0.99)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
  #optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss, global_step=global_step)

  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [None]:
num_steps = 75001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_images[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size)]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 2500 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  final_predictions = test_prediction.eval()
  print('Test accuracy: %.1f%%' % accuracy(final_predictions, test_labels))

In [None]:
print final_predictions.shape
predictions = np.argmax(final_predictions, 1).T
print predictions.shape

rand_indices = np.random.randint(0, test_images.shape[0], size=display_size)
fig = P.figure(figsize=(20, 20))
for idx in range(len(rand_indices)):
    orig_idx = rand_indices[idx]
    a=fig.add_subplot(1,display_size,idx+1)
    P.imshow(test_images[orig_idx,:,:,0])
    a.set_title(str(predictions[orig_idx]))#rand_indices[idx, 0]]]]))