# Build Model for Training, and Test accuracy

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import tarfile
from IPython.display import display, Image
from PIL import Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from six.moves.urllib.request import urlretrieve
import cPickle as pickle
import tensorflow as tf

# Config the matlotlib backend as plotting inline in IPython
%matplotlib inline
%autosave 300
np.set_printoptions(threshold=np.inf)

  inline backend."""
  'retina', 'jpeg', 'svg', 'pdf'.""")
  use `figure_formats` instead)""")
  """
  """)
  def _config_changed(self, name, old, new):


Autosaving every 300 seconds


### Deserialize

In [2]:
train_pickle_file = 'SVHN_basic_train_labels.pickle'

with open(train_pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_labels = save['train_image_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_labels.shape)
print(train_labels[0:2,:])

Training set (33402, 6)
[[ 2.  0.  0.  0.  1.  9.]
 [ 2.  0.  0.  0.  2.  3.]]


In [3]:
train_pickle_file = 'SVHN_basic_train_data_basic.pickle'

with open(train_pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  del save  # hint to help gc free up memory
  print('Training set',train_dataset.shape)
print(train_labels[0:2,:])

Training set (33402, 32, 32)
[[ 2.  0.  0.  0.  1.  9.]
 [ 2.  0.  0.  0.  2.  3.]]


In [4]:
test_pickle_file = 'SVHN_basic_test_labels.pickle'

with open(test_pickle_file, 'rb') as f:
  save = pickle.load(f)
  test_labels = save['test_image_labels'][0:1000]
  del save  # hint to help gc free up memory
  print('Test set', test_labels.shape)

test_pickle_file = 'SVHN_basic_test_data_basic.pickle'

with open(test_pickle_file, 'rb') as f:
  save = pickle.load(f)
  test_dataset = save['test_dataset'][0:1000]
  del save  # hint to help gc free up memory
  print('Test set', test_dataset.shape)

Test set (1000, 6)
Test set (1000, 32, 32)


####TODO: Randomize and Shuffle
####TODO: Use Boosting
####TODO: Use Simulated Annealing / Decay

###Reformat

In [5]:
### Reformat dataset to 1 channel
image_size=32
num_channels=1 #greyscale

def reformat(dataset):
  dataset = dataset.reshape((-1, image_size, image_size, num_channels))\
        .astype(np.float32)
  return dataset

train_dataset = reformat(train_dataset)
test_dataset = reformat(test_dataset)
print("Reformatted shapes of datasets\n")
print("train_dataset.shape:",train_dataset.shape,", train_labels.shape:",train_labels.shape)
print("test_dataset.shape:", test_dataset.shape,", test_labels.shape:",test_labels.shape)

Reformatted shapes of datasets

train_dataset.shape: (33402, 32, 32, 1) , train_labels.shape: (33402, 6)
test_dataset.shape: (1000, 32, 32, 1) , test_labels.shape: (1000, 6)


### Define Accuracy

In [6]:
def accuracy(predictions, labels):
    #print("predictions:", predictions)
    print("labels:\n", labels)
    label_count=labels.shape[0]
    len_preds=np.argmax(predictions[0,:,:],axis=1) - 4
    digit_1_preds=np.argmax(predictions[1,:,:],axis=1)
    digit_2_preds=np.argmax(predictions[2,:,:],axis=1)
    digit_3_preds=np.argmax(predictions[3,:,:],axis=1)
    digit_4_preds=np.argmax(predictions[4,:,:],axis=1)
    digit_5_preds=np.argmax(predictions[5,:,:],axis=1)
    print("len_preds:", len_preds)
    print("digit_1_preds:\n", digit_1_preds)
    print("digit_2_preds:\n", digit_2_preds)
    print("digit_3_preds:\n", digit_3_preds)
    print("digit_4_preds:\n", digit_4_preds)
    print("digit_5_preds:\n", digit_5_preds)

    
    
    for label_counter in range(label_count):
        length_prediction=len_preds[label_counter]
        for digit_place_counter in range(5 - length_prediction):
            if digit_place_counter == 1:
                digit_1_preds[label_counter]=0
            elif digit_place_counter == 2:
                digit_2_preds[label_counter]=0
            elif digit_place_counter == 3:
                digit_3_preds[label_counter]=0
            elif digit_place_counter == 4:
                digit_4_preds[label_counter]=0
            elif digit_place_counter == 5:
                digit_5_preds[label_counter]=0
    print("\n------------ AFTER --------------\n")
    print("digit_1_preds:\n", digit_1_preds)
    print("digit_2_preds:\n", digit_2_preds)
    print("digit_3_preds:\n", digit_3_preds)
    print("digit_4_preds:\n", digit_4_preds)
    print("digit_5_preds:\n", digit_5_preds)
            
    len_accuracy=len_preds==labels[:,0]
    digit_1_accuracy=digit_1_preds==labels[:,1]
    digit_2_accuracy=digit_2_preds==labels[:,2]
    digit_3_accuracy=digit_3_preds==labels[:,3]
    digit_4_accuracy=digit_4_preds==labels[:,4]
    digit_5_accuracy=digit_5_preds==labels[:,5]
    
    for label_counter in range(label_count):
        length_prediction=len_preds[label_counter]
        for digit_place_counter in range(6 - length_prediction):
            if digit_place_counter == 1:
                digit_1_accuracy[label_counter]=True
            elif digit_place_counter == 2:
                digit_2_accuracy[label_counter]=True
            elif digit_place_counter == 3:
                digit_3_accuracy[label_counter]=True
            elif digit_place_counter == 4:
                digit_4_accuracy[label_counter]=True
            elif digit_place_counter == 5:
                digit_5_accuracy[label_counter]=True
            
        
    
    complete_accuracy=np.concatenate((len_accuracy.reshape(1,label_count),digit_1_accuracy.reshape(1,label_count),\
                                      digit_2_accuracy.reshape(1,label_count),digit_3_accuracy.reshape(1,label_count), \
                                      digit_4_accuracy.reshape(1,label_count),digit_5_accuracy.reshape(1,label_count)), axis=0).T
    print("complete_accuracy:\n",complete_accuracy.shape, complete_accuracy)
    return 100.0 * (np.sum([np.all(row) for row in complete_accuracy])) / len(labels)

In [7]:
def LecunLCN(X, image_shape, threshold=1e-4, radius=7, use_divisor=True):
    """Local Contrast Normalization"""
    """[http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf]"""

    # Get Gaussian filter
    filter_shape = (radius, radius, image_shape[3], 1)

    #self.filters = theano.shared(self.gaussian_filter(filter_shape), borrow=True)
    filters = gaussian_filter(filter_shape)
    X = tf.convert_to_tensor(X, dtype=tf.float32)
    # Compute the Guassian weighted average by means of convolution
    convout = tf.nn.conv2d(X, filters, [1,1,1,1], 'SAME')

    # Subtractive step
    mid = int(np.floor(filter_shape[1] / 2.))

    # Make filter dimension broadcastable and subtract
    centered_X = tf.sub(X, convout)

    # Boolean marks whether or not to perform divisive step
    if use_divisor:
        # Note that the local variances can be computed by using the centered_X
        # tensor. If we convolve this with the mean filter, that should give us
        # the variance at each point. We simply take the square root to get our
        # denominator

        # Compute variances
        sum_sqr_XX = tf.nn.conv2d(tf.square(centered_X), filters, [1,1,1,1], 'SAME')

        # Take square root to get local standard deviation
        denom = tf.sqrt(sum_sqr_XX)

        per_img_mean = tf.reduce_mean(denom)
        divisor = tf.maximum(per_img_mean, denom)
        # Divisise step
        new_X = tf.truediv(centered_X, tf.maximum(divisor, threshold))
    else:
        new_X = centered_X

    return new_X


def gaussian_filter(kernel_shape):
    x = np.zeros(kernel_shape, dtype = float)
    mid = np.floor(kernel_shape[0] / 2.)
    
    for kernel_idx in xrange(0, kernel_shape[2]):
        for i in xrange(0, kernel_shape[0]):
            for j in xrange(0, kernel_shape[1]):
                x[i, j, kernel_idx, 0] = gauss(i - mid, j - mid)
    
    return tf.convert_to_tensor(x / np.sum(x), dtype=tf.float32)

def gauss(x, y, sigma=3.0):
    Z = 2 * np.pi * sigma ** 2
    return  1. / Z * np.exp(-(x ** 2 + y ** 2) / (2. * sigma ** 2))

#### Create Model

In [8]:
batch_size = 64
patch_size = 5
depth1 = 16
depth2 = 24
depth3 = 32
depth4 = 48
depth5 = 64
depth6 = 128
depth7 = 128
depth8 = 256
depth9 = 512
depth10 = 512
num_hidden1 = 1024
num_hidden2 = 512
num_labels = 10 # 10 for 0-9
len_labels=6 #6 for lenghts0-5, not yet for more than 5
dropout = 0.50



graph = tf.Graph()

with graph.as_default():
  keep_prob = tf.placeholder(tf.float32)

  # Input data.
  tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, len_labels))

  tf_digit_masks_1= tf.placeholder(tf.float32, shape=(batch_size))
  tf_digit_masks_2= tf.placeholder(tf.float32, shape=(batch_size))
  tf_digit_masks_3= tf.placeholder(tf.float32, shape=(batch_size))
  tf_digit_masks_4= tf.placeholder(tf.float32, shape=(batch_size))
  tf_digit_masks_5= tf.placeholder(tf.float32, shape=(batch_size))
  
  tf_test_dataset = tf.constant(test_dataset)

  
  # Variables.
  cnv_lyr1_weights = tf.get_variable("W1", shape=[patch_size, patch_size, num_channels, depth1], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr1_biases = tf.Variable(tf.constant(0.01, shape=[depth1]))

  cnv_lyr2_weights = tf.get_variable("W2", shape=[patch_size, patch_size, depth1, depth2], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr2_biases = tf.Variable(tf.constant(0.01, shape=[depth2]))

  cnv_lyr3_weights = tf.get_variable("W3", shape=[patch_size, patch_size, depth2, depth3], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr3_biases = tf.Variable(tf.constant(0.01, shape=[depth3]))

  cnv_lyr4_weights = tf.get_variable("W4", shape=[patch_size, patch_size, depth3, depth4], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr4_biases = tf.Variable(tf.constant(0.01, shape=[depth4]))

  cnv_lyr5_weights = tf.get_variable("W5", shape=[patch_size, patch_size, depth4, depth5], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr5_biases = tf.Variable(tf.constant(0.01, shape=[depth5]))

  cnv_lyr6_weights = tf.get_variable("W6", shape=[patch_size, patch_size, depth5, depth6], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr6_biases = tf.Variable(tf.constant(0.01, shape=[depth6]))

  cnv_lyr7_weights = tf.get_variable("W7", shape=[patch_size, patch_size, depth6, depth7], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr7_biases = tf.Variable(tf.constant(0.01, shape=[depth7]))

  cnv_lyr8_weights = tf.get_variable("W8", shape=[patch_size, patch_size, depth7, depth8], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr8_biases = tf.Variable(tf.constant(0.01, shape=[depth8]))
  
  cnv_lyr9_weights = tf.get_variable("W9", shape=[patch_size, patch_size, depth8, depth9], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr9_biases = tf.Variable(tf.constant(0.01, shape=[depth9]))

  cnv_lyr10_weights = tf.get_variable("W10", shape=[patch_size, patch_size, depth9, depth10], initializer=tf.contrib.layers.xavier_initializer_conv2d())
  cnv_lyr10_biases = tf.Variable(tf.constant(0.01, shape=[depth10]))
  
    
  fc_weights_1 = tf.get_variable("WFC1", shape=[depth9 * depth9 // 64  * depth10 // 256, num_hidden1], initializer=tf.contrib.layers.xavier_initializer())
  fc_biases_1 = tf.Variable(tf.constant(0.1, shape=[num_hidden1]))
    
  fc_weights_2 = tf.get_variable("WFC2", shape=[num_hidden1  , num_hidden2], initializer=tf.contrib.layers.xavier_initializer())
  fc_biases_2 = tf.Variable(tf.constant(0.1, shape=[num_hidden2]))

  length_weights = tf.get_variable("WL", shape=[num_hidden2 , len_labels], initializer=tf.contrib.layers.xavier_initializer())
  length_biases = tf.Variable(tf.constant(0.1, shape=[len_labels]))

  digit1_weights = tf.get_variable("WD1", shape=[num_hidden2 , num_labels], initializer=tf.contrib.layers.xavier_initializer())
  digit1_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]))

  digit2_weights = tf.get_variable("WD2", shape=[num_hidden2 , num_labels],initializer=tf.contrib.layers.xavier_initializer())
  digit2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]))

  digit3_weights = tf.get_variable("WD3", shape=[num_hidden2 , num_labels], initializer=tf.contrib.layers.xavier_initializer())
  digit3_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]))

  digit4_weights = tf.get_variable("WD4", shape=[num_hidden2, num_labels], initializer=tf.contrib.layers.xavier_initializer())
  digit4_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]))

  digit5_weights = tf.get_variable("WD5", shape=[num_hidden2 , num_labels], initializer=tf.contrib.layers.xavier_initializer())
  digit5_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]))

  def max_pool_2x2(x):
     return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
  
  # Model.
  def model(data, digit_masks_1,digit_masks_2,digit_masks_3,digit_masks_4,digit_masks_5, is_train, dropout_prob):
    print("data.shape:",data.get_shape())
    
    conv = tf.nn.conv2d(data, cnv_lyr1_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr1_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    #h_pool1 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(lrn, cnv_lyr2_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr2_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    h_pool2 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(h_pool2, cnv_lyr3_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr3_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    #h_pool3 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(lrn, cnv_lyr4_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr4_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    h_pool4 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(h_pool4, cnv_lyr5_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr5_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    #h_pool5 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(lrn, cnv_lyr6_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr6_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    h_pool6 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(lrn, cnv_lyr7_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr7_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    #h_pool7 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(lrn, cnv_lyr8_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr8_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    h_pool8 = max_pool_2x2(lrn)
    
    conv = tf.nn.conv2d(lrn, cnv_lyr9_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr9_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    
    conv = tf.nn.conv2d(lrn, cnv_lyr10_weights, [1, 1, 1, 1], padding='SAME')
    hidden = tf.nn.relu(conv + cnv_lyr10_biases)
    lrn = tf.nn.local_response_normalization(hidden)
    h_pool10 = max_pool_2x2(lrn)
    
    shape = h_pool10.get_shape().as_list()
    conv_result_reshape = tf.reshape(h_pool10, [shape[0], shape[1] * shape[2] * shape[3]])
    print("conv_result_reshape:",conv_result_reshape.get_shape())
    hidden = tf.nn.relu(tf.matmul(conv_result_reshape, fc_weights_1) + fc_biases_1)
    hidden = tf.nn.relu(tf.matmul(hidden, fc_weights_2) + fc_biases_2)
    
    # Apply Dropout
    hidden = tf.nn.dropout(hidden, dropout_prob)
    
    logit_length = tf.matmul(hidden, length_weights) + length_biases
    
    logit_1 = tf.nn.relu((tf.matmul(hidden, digit1_weights) + digit1_biases))
    logit_2 = tf.nn.relu((tf.matmul(hidden, digit2_weights) + digit2_biases))
    logit_3 = tf.nn.relu((tf.matmul(hidden, digit3_weights) + digit3_biases))
    logit_4 = tf.nn.relu((tf.matmul(hidden, digit4_weights) + digit4_biases))
    logit_5 = tf.nn.relu((tf.matmul(hidden, digit5_weights) + digit5_biases))

    

    return logit_length, logit_1,logit_2,logit_3,logit_4,logit_5
  
  # Training computation.
  logit_len, logit_digit_1,logit_digit_2,logit_digit_3,logit_digit_4,logit_digit_5 \
    = model(tf_train_dataset, tf_digit_masks_1, tf_digit_masks_2, tf_digit_masks_3, tf_digit_masks_4, \
            tf_digit_masks_5, True, keep_prob)

  loss_len     = tf.nn.sparse_softmax_cross_entropy_with_logits(logit_len, tf_train_labels[:,0])
 
  loss_digit_1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logit_digit_1, tf_train_labels[:,1])
  loss_digit_1 = loss_digit_1 * tf_digit_masks_1
    
  loss_digit_2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logit_digit_2, tf_train_labels[:,2])
  loss_digit_2 =  loss_digit_2 * tf_digit_masks_2
    
  loss_digit_3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logit_digit_3, tf_train_labels[:,3])
  loss_digit_3 =  loss_digit_3 * tf_digit_masks_3
    
  loss_digit_4 = tf.nn.sparse_softmax_cross_entropy_with_logits(logit_digit_4, tf_train_labels[:,4])
  loss_digit_4 =  loss_digit_4 * tf_digit_masks_4
  
  loss_digit_5 = tf.nn.sparse_softmax_cross_entropy_with_logits(logit_digit_5, tf_train_labels[:,5])
  print("loss_digit_5 : BEFORE :", loss_digit_5,", tf_digit_masks_5: ", tf_digit_masks_5)
  loss_digit_5 =  loss_digit_5 * tf_digit_masks_5
  print("loss_digit_5 : AFTER :", loss_digit_5)

  loss = tf.reduce_mean(loss_len) + tf.reduce_mean(loss_digit_1) + tf.reduce_mean(loss_digit_2) \
        + tf.reduce_mean(loss_digit_3) + tf.reduce_mean(loss_digit_4) + tf.reduce_mean(loss_digit_5)
    
    
  # Optimizer.
  global_step = tf.Variable(0, trainable=False)
  starter_learning_rate = 0.1
  learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           10000, 0.97, staircase=True)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
  #optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
  
  # Predictions for TRAIN
  len_preds = tf.nn.softmax(logit_len)
  pads = tf.constant([[0, 0], [4, 0]])
  len_preds=tf.pad(len_preds, pads, "CONSTANT")
  train_prediction = tf.pack([ len_preds, tf.nn.softmax(logit_digit_1), tf.nn.softmax(logit_digit_2), tf.nn.softmax(logit_digit_3), \
                              tf.nn.softmax(logit_digit_4), tf.nn.softmax(logit_digit_5) ])

  #Predictions for TEST
  logit_len_test, logit_digit_1_test, logit_digit_2_test, logit_digit_3_test, logit_digit_4_test, logit_digit_5_test \
    = model(tf_test_dataset, tf_digit_masks_1, tf_digit_masks_2, tf_digit_masks_3, tf_digit_masks_4, tf_digit_masks_5, False, 1.)
  len_preds_test = tf.nn.softmax(logit_len_test)
  pads_test = tf.constant([[0, 0], [4, 0]])
  len_preds_test=tf.pad(len_preds_test, pads_test, "CONSTANT")
  test_prediction = tf.pack([ len_preds_test, tf.nn.softmax(logit_digit_1_test), tf.nn.softmax(logit_digit_2_test), \
                             tf.nn.softmax(logit_digit_3_test), tf.nn.softmax(logit_digit_4_test), \
                             tf.nn.softmax(logit_digit_5_test) ])
   

data.shape: (64, 32, 32, 1)
conv_result_reshape: (64, 8192)
loss_digit_5 : BEFORE : Tensor("SparseSoftmaxCrossEntropyWithLogits_5:0", shape=(64,), dtype=float32) , tf_digit_masks_5:  Tensor("Placeholder_7:0", shape=(64,), dtype=float32)
loss_digit_5 : AFTER : Tensor("mul_4:0", shape=(64,), dtype=float32)
data.shape: (1000, 32, 32, 1)
conv_result_reshape: (1000, 8192)


In [9]:
# Create 5 mask arrays; 1 for each digit.
# Each such Xth array will have all zeroes in Nth a row, if Xh digit is not present in Nth number in batch. 
def explode(batch_labels_arr):
   mask_arr=np.ones((batch_labels_arr.shape[0],batch_labels_arr.shape[1]-1),dtype=np.int32)
   for counter in range(batch_labels_arr.shape[0]):
       mask_arr[counter,0:5-batch_labels_arr[counter,0]]=0
       #=np.concatenate((np.zeros(5-batch_labels_arr[counter][0]), np.ones(batch_labels_arr[counter][0])))
   return mask_arr[:,0],mask_arr[:,1],mask_arr[:,2],mask_arr[:,3],mask_arr[:,4]
    
tmp_arr = train_labels[20:27,:]
print(tmp_arr)
#print(tmp_arr[:,1:] >0)
#print((tmp_arr[:,1:] >0).astype(int))
#print("\nshape:", train_labels[20:27,:].shape)
explode(train_labels[20:27,:])

[[  1.   0.   0.   0.   0.   2.]
 [  3.   0.   0.   5.   1.   5.]
 [  2.   0.   0.   0.   4.   7.]
 [  2.   0.   0.   0.   8.   9.]
 [  3.   0.   0.   6.  10.   1.]
 [  2.   0.   0.   0.   2.   4.]
 [  2.   0.   0.   0.   5.   6.]]




(array([0, 0, 0, 0, 0, 0, 0], dtype=int32),
 array([0, 0, 0, 0, 0, 0, 0], dtype=int32),
 array([0, 1, 0, 0, 1, 0, 0], dtype=int32),
 array([0, 1, 1, 1, 1, 1, 1], dtype=int32),
 array([1, 1, 1, 1, 1, 1, 1], dtype=int32))

In [10]:
num_steps = 20001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :,:]
    batch_labels = train_labels[offset:(offset + batch_size), :].astype(int)
    digit_1_mask, digit_2_mask,digit_3_mask,digit_4_mask,digit_5_mask = explode(batch_labels)
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, tf_digit_masks_1 : digit_1_mask ,\
                tf_digit_masks_2 : digit_2_mask, tf_digit_masks_3 : digit_3_mask, tf_digit_masks_4 : digit_4_mask, \
                tf_digit_masks_5 : digit_5_mask, keep_prob : dropout}
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step > 1 and step % 500 == 0):
      print("batch_labels:",batch_labels)
      print(" digit_1_mask, digit_2_mask,digit_3_mask,digit_4_mask,digit_5_mask:", digit_1_mask, digit_2_mask, \
          digit_3_mask,digit_4_mask,digit_5_mask)
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
  

  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
batch_labels: [[ 2  0  0  0  1 10]
 [ 2  0  0  0  1  1]
 [ 2  0  0  0  5 10]
 [ 3  0  0  1  4  2]
 [ 2  0  0  0  5  9]
 [ 1  0  0  0  0  1]
 [ 1  0  0  0  0  8]
 [ 2  0  0  0  5  1]
 [ 2  0  0  0  6 10]
 [ 3  0  0  1  1  8]
 [ 2  0  0  0  7  3]
 [ 1  0  0  0  0  3]
 [ 2  0  0  0  9  8]
 [ 1  0  0  0  0  9]
 [ 2  0  0  0  8  1]
 [ 3  0  0  1  3  4]
 [ 3  0  0  3 10  5]
 [ 2  0  0  0  9  3]
 [ 2  0  0  0  3  5]
 [ 2  0  0  0  8  4]
 [ 1  0  0  0  0  4]
 [ 2  0  0  0  7 10]
 [ 2  0  0  0  1 10]
 [ 3  0  0  2  1  6]
 [ 1  0  0  0  0  7]
 [ 2  0  0  0  4  5]
 [ 2  0  0  0  3  1]
 [ 1  0  0  0  0  5]
 [ 2  0  0  0  9 10]
 [ 2  0  0  0  1  9]
 [ 2  0  0  0  8  6]
 [ 2  0  0  0  1  2]
 [ 2  0  0  0  2  2]
 [ 2  0  0  0  7  3]
 [ 4  0  2  5 10  6]
 [ 1  0  0  0  0  8]
 [ 3  0  0  2 10  1]
 [ 1  0  0  0  0  7]
 [ 3  0  0  1 10  6]
 [ 2  0  0  0  2  8]
 [ 3  0  0  1  1  8]
 [ 2  0  0  0  1  3]
 [ 4  0  2  2  6  8]
 [ 2  0  0  0  7  6]
 [ 2  0  0  0  4  7]
 [ 2  0  0  0  1  2]
 [ 2  0 

#### 