# Build a model to decode sequences of digits from images

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

## Load data

In [24]:
np.random.seed(133)

def load_data(file_name,valid_size=0):
     with open(file_name, 'r') as f:
        data=pickle.load(f)
        labels=data["label"]
        images=data["image"]
        if valid_size:
            all_data=zip(images,labels)
            np.random.shuffle(all_data)
            valid_data=all_data[:valid_size]
            valid_images,valid_labels=tuple([list(l) for l in zip(*valid_data)])
            train_data=all_data[valid_size:]
            train_images,train_labels=tuple([list(l) for l in zip(*train_data)])
            return train_images,train_labels,valid_images,valid_labels
        return images,labels
valid_size=1670 #about 5% of the data
train_images,train_labels,valid_images,valid_labels=load_data("train.pickle",valid_size)
test_images,test_labels=load_data("test.pickle")
print("train:",len(train_images),len(train_labels))
print("validation:",len(valid_images),len(valid_labels))
print("test:",len(test_images),len(test_labels))

train: 31732 31732
validation: 1670 1670
test: 13068 13068


In [35]:
img_size=32*32*3
distinct_labels=np.arange(1,12).astype(np.float32) # 11 reprsents no more digit
distinct_labels_size=len(distinct_labels)
blanc_label=distinct_labels[distinct_labels_size-1]

def reshape_image(image):
    return np.reshape(image,img_size).astype(np.float32)

def reshape_label(label):
    return (distinc_labels==label).astype(np.float32)

def print_label(label):
    if label==10.:
        return "0"
    elif label==11.0:
        return " "
    return str(int(label))

print(distinct_labels)
print(reshape_label(5.0))
print([print_label(i) for i in distinct_labels])
print(blanc_label)

[  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.]
[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', ' ']
11.0


## Build the graph

In [None]:
num_nodes = 64
train_subset = 5000

graph = tf.Graph()
with graph.as_default():
    
    # Parameters:
    # Input gate: input, previous output, and bias.
    ix = tf.Variable(tf.truncated_normal([img_size, num_nodes], stddev=0.1))
    im = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], stddev=0.1))
    ib = tf.Variable(tf.ones([1, num_nodes]))
    # Forget gate: input, previous output, and bias.
    fx = tf.Variable(tf.truncated_normal([img_size, num_nodes], stddev=0.1))
    fm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], stddev=0.1))
    fb = tf.Variable(tf.ones([1, num_nodes]))
    # Memory cell: input, state and bias.                     
    cx = tf.Variable(tf.truncated_normal([img_size, num_nodes], stddev=0.1))
    cm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], stddev=0.1))
    cb = tf.Variable(tf.ones([1, num_nodes]))
    # Output gate: input, previous output, and bias.
    ox = tf.Variable(tf.truncated_normal([img_size, num_nodes], stddev=0.1))
    om = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], stddev=0.1))
    ob = tf.Variable(tf.ones([1, num_nodes]))
    
    
    # Classifier weights and biases.
    weights = tf.Variable(tf.truncated_normal([num_nodes, distinct_labels_size], stddev=0.1))
    biases = tf.Variable(tf.ones([distinct_labels_size]))
        
    # Definition of the cell computation.
    
    def lstm_cell(i, o, state):
        """Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
        Note that in this formulation, we omit the various connections between the
        previous state and the gates."""        
        input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)        
        forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
        update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
        state = forget_gate * state + input_gate * tf.tanh(update)
        output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
        return output_gate * tf.tanh(state), state
    
    def lstm_cell_image(image,labels):
        """transfom image and correspondig labels by applying lstm_cell"""        
        # Variables saving state across unrollings.
        saved_output = tf.Variable(tf.zeros([1, num_nodes]), trainable=False)
        saved_state = tf.Variable(tf.zeros([1, num_nodes]), trainable=False)
        outputs = list()
        output_labels=list()
        output = saved_output
        state = saved_state
        i=tf.constant(reshape_image(image))
        labels=labels+[blanc_label]#the last prediction should be blanc label
        for label in labels:
            output, state = lstm_cell(i, output, state)
            outputs.append(output)
            output_labels.append(label)
        #state saving
        dependencies=[saved_output.assign(output), saved_state.assign(state)]
        return outputs,output_labels,dependencies
    
    def lstm_cell_data(images,labels):
        """transfom a dataset by applying lstm_cell"""
        outputs=list()
        output_labels=list()
        dependencies=list()
        for image,image_labels in zip(images,labels):
            image_outputs,image_output_labels, image_dependencies=lstm_cell_image(image,labels)
            outputs.extend(image_outputs)
            output_labels.extend(image_output_labels)
            dependencies.extend(image_dependencies)
        return outputs,tf.constant(output_labels),dependencies
    
    train_data,train_digits,train_dependencies=lstm_cell_data(train_images[:train_subset],train_labels[:train_subset])
    valid_data,valid_digits,valid_dependencies=lstm_cell_data(valid_images,valid_labels)
    test_data,test_digits,test_dependencies=lstm_cell(test_images,test_labels)
    
    def model(data):
        """model the data"""
        return tf.matmul(data,weights)+biases
    
    
    # State saving across unrollings.
    with tf.control_dependencies(train_dependencies):
        # Classifier.
        logits = model(train_data)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits, train_digits))
        
    
    # Optimizer.
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(
        10.0, global_step, 5000, 0.1, staircase=True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    gradients, v = zip(*optimizer.compute_gradients(loss))
    gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
    optimizer = optimizer.apply_gradients(
        zip(gradients, v), global_step=global_step)
    
    
    # Predictions.
    with tf.control_dependencies(valid_dependencies):
        valid_prediction=tf.nn.softmax(model(valid_data))
    with tf.control_dependencies(test_dependencies):
        test_prediction=tf.nn.softmax(model(test_data))
  
  # Sampling and validation eval: batch 1, no unrolling.TODO

In [36]:
num_steps = 801

def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
            / predictions.shape[0])

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
    _, l, predictions,labels = session.run([optimizer, loss, train_prediction,train_digits])
    if (step % 100 == 0):
        print('Loss at step %d: %f' % (step, l))
        print('Training accuracy: %.1f%%' % accuracy(
                predictions, train_digits))
        # Calling .eval() on valid_prediction is basically like calling run(), but
        # just to get that one numpy array. Note that it recomputes all its graph
        # dependencies
        print('Validation accuracy: %.1f%%' % accuracy(*
                                                       session.run([valid_prediction,valid_digits])))
    print('Test accuracy: %.1f%%' % accuracy(*session.run([test_prediction,test_digits])))

[1, 2, 3, 5]
[1, 2, 3]
[1, 2, 3, 6, 7]
