# Build a model to decode sequences of digits from images

In [None]:
%matplotlib inline

from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import sys
import matplotlib.pyplot as plt
import os

## Load data

In [None]:
np.random.seed(133)

def load_data(file_name,valid_ratio=0):
     with open(file_name, 'r') as f:
        data=pickle.load(f)
        labels=data["label"]
        images=data["image"]
        if valid_ratio>0:
            valid_size=int(len(labels)*valid_ratio)
            all_data=zip(images,labels)
            np.random.shuffle(all_data)
            valid_data=all_data[:valid_size]
            valid_images,valid_labels=tuple([list(l) for l in zip(*valid_data)])
            train_data=all_data[valid_size:]
            train_images,train_labels=tuple([list(l) for l in zip(*train_data)])
            return train_images,train_labels,valid_images,valid_labels
        return images,labels
valid_ratio=.05 #about 5% of the data
loaded_train_data,loaded_train_labels,loaded_valid_data,loaded_valid_labels=load_data("train.pickle",valid_ratio)
loaded_test_data,loaded_test_labels=load_data("test.pickle")
print("train:",len(loaded_train_data),len(loaded_train_labels))
print("validation:",len(loaded_valid_data),len(loaded_valid_labels))
print("test:",len(loaded_test_data),len(loaded_test_labels))

In [None]:
#keep just what our memory can support
loaded_train_data=loaded_train_data[:100000]
loaded_train_labels=loaded_train_labels[:100000]

### Preprocess data

In [None]:
max_labels=2 #let us start by extracting the first char
num_labels=11
distinct_labels=np.arange(num_labels).astype(np.float32) 
blanc_label=distinct_labels[-1] # 10 represensts blanc label
image_size=32
num_channels = 3

def to_one_hot(label,distincts=None):
    """convert labels to 1-hot encoding"""
    if distincts is None:
        distincts=distinct_labels
    return (distincts == label).astype(np.float32)

    
def rgb2gray(rgb):
    """convert rgb image to gray
        see http://stackoverflow.com/questions/12201577/how-can-i-convert-an-rgb-image-into-grayscale-in-python"""
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

def from_one_hot(label):
    """retrieve labels from 1-hot encoded"""    
    return np.argmax(label)

def preprocess_labels(labels):
    """padd labels, and then convert them to 1-hot"""
    processed=[list() for i in range(max_labels)]
    for image_label in labels :
        for i in range(max_labels):
            if i<len(image_label):
                label=image_label[i]
                label=label if label!=blanc_label else 0 #replace 10 by 0
            else:
                label= blanc_label 
            distincts=distinct_labels[:-1] if i==0 else distinct_labels # first label cannot be blanc, so there are only 10 candidates for the first label
            processed[i].append(to_one_hot(label,distincts))    
    return [np.asanyarray(l,dtype=np.float32) for l in processed]        

def preprocess_images(images):
    """convert to numpy array and apply grayscale"""
    np_images=np.asanyarray(images,dtype=np.float32)
    #gray_images=rgb2gray(np_images)
    reshaped_images=np_images.reshape((-1, image_size , image_size,num_channels)).astype(np.float32)
    return reshaped_images
    
def preprocess_data(images,labels):
    return preprocess_images(images),preprocess_labels(labels)
 

In [None]:
train_data,train_labels=preprocess_data(loaded_train_data,loaded_train_labels)
valid_data,valid_labels=preprocess_data(loaded_valid_data,loaded_valid_labels)
test_data,test_labels=preprocess_data(loaded_test_data,loaded_test_labels)

#### Display some labels and images to make sure that preprocessing ran fine

In [None]:
def display_image(image):
    plt.figure()
    plt.imshow(image)
    plt.show()
    
def display_images(data,labels,name=None,num_images=5):
    print ("showing some labels and images for %s"%name)  
    print ("data shape: ",data.shape)
    print("labels shape: ",[label.shape for label in labels])
    total_images=data.shape[0]
    for i in np.random.choice(total_images,num_images):
        print([from_one_hot(label[i]) for label in labels])   
        display_image(data[i])
            
display_images(train_data,train_labels,loaded_train_data,loaded_train_labels,"train")
display_images(valid_data,valid_labels,loaded_valid_data,loaded_valid_labels,"valid")
display_images(test_data,test_labels,loaded_test_data,loaded_test_labels,"test")

In [None]:
class IndexGenerator(object):
    """generates the next index of the data from wich we can take a subset of length batch_size"""
    def __init__(self, length):
        self._step=0
        self._length=length
    def _next(self,batch_size):
        offset = (self._step * batch_size) % (self._length - batch_size)
        self._step=self._step+1
        return offset

train_index_generator=IndexGenerator(len(train_data))
valid_index_generator=IndexGenerator(len(valid_data))
test_index_generator=IndexGenerator(len(test_data))
#print("train indexes:",train_index_generator._next(),train_index_generator._next(),train_index_generator._next(),train_index_generator._next())
#for i in range(100):
#    print(valid_index_generator._next())

In [None]:
class BatchGenerator(object):
    """generates a batch of data balancing labels"""
    def __init__(self,labels):
        self.step=0
        labs=np.argmax(labels,1)
        self.indices=[np.where(labs==lab)[0] for lab in np.unique(labs)]
        for index_set in self.indices:
            np.random.shuffle(index_set)            
        
    def _next(self,batch_size):
        indices=[]
        batch=batch_size/len(self.indices)
        for index_set in self.indices:
            length=len(index_set)
            offset = (self.step * batch) % (length - batch)
            indices.extend(index_set[offset:offset+batch])            
        self.step+=1        
        np.random.shuffle(indices)
        return indices
        
        
    
train_batch_generators=[BatchGenerator(labels) for labels in train_labels]
        
        

In [None]:
for index,generator in enumerate(train_batch_generators):  
    #generator.step=31732
    indices=generator._next(128)
    data=train_data[indices,:]
    labels=train_labels[index][indices,:]
    display_images(data,[labels],None,None,name="some images from the generated batches by generator %d"%(index))
    
    

In [None]:
generator=train_batch_generators[0]
generated_labels=np.argmax(train_labels[0][generator._next(128),:],1)
counts={}
for label in generated_labels:
    if label not in counts:
        counts[label]=0
    counts[label]=counts[label]+1

print(counts)


## Build the graph

In [None]:
patch_size = 5
conv1_size = 48
conv2_size = 64
conv3_size = 128

squared_image_size=image_size*image_size
img_size=squared_image_size
lstm_num_nodes=128
num_hidden = 64 #convolution features
graph = tf.Graph()
with graph.as_default():  
    
    #conv parameters
    conv1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, conv1_size], stddev=0.1))
    conv1_biases = tf.Variable(tf.zeros([conv1_size]))
    conv2_weights = tf.Variable(tf.truncated_normal(
          [patch_size, patch_size, conv1_size, conv2_size], stddev=0.1))
    conv2_biases = tf.Variable(tf.constant(1.0, shape=[conv2_size]))  
    conv3_weights = tf.Variable(tf.truncated_normal(
          [patch_size, patch_size, conv2_size, conv3_size], stddev=0.1))
    conv3_biases = tf.Variable(tf.zeros([conv3_size]))
    layer4_weights = tf.Variable(tf.truncated_normal(
            [image_size // 4 * image_size // 16 * conv3_size, num_hidden], stddev=0.1))
    layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    
    def conv(data):
        """apply our three layer convolution on the image"""        
        conv1 = tf.nn.conv2d(data, conv1_weights, [1, 1, 1, 1], padding='SAME')
        hidden1 = tf.nn.relu(conv1 + conv1_biases)
        pool1 = tf.nn.max_pool(hidden1, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
        
        conv2 = tf.nn.conv2d(pool1, conv2_weights, [1, 1, 1, 1], padding='SAME')
        hidden2 = tf.nn.relu(conv2 + conv2_biases)
        pool2 = tf.nn.max_pool(hidden2, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
        
        conv3 = tf.nn.conv2d(pool2, conv3_weights, [1, 1, 1, 1], padding='SAME')
        hidden3 = tf.nn.relu(conv3 + conv3_biases)
        pool3 = tf.nn.max_pool(hidden3, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
        shape = pool3.get_shape().as_list()
        #print("shape: ",shape)
        reshape = tf.reshape(pool3, [-1, shape[1] * shape[2] * shape[3]])
        #print("reshaped: ",reshape.get_shape().as_list())
        return tf.nn.relu(tf.matmul(reshape, layer4_weights) + layer4_biases)
    
    ix = tf.Variable(tf.truncated_normal([num_hidden, lstm_num_nodes], stddev=0.1))
    im = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    ib = tf.Variable(tf.ones([1, lstm_num_nodes]))
    # Forget gate: input, previous output, and bias.
    fx = tf.Variable(tf.truncated_normal([num_hidden, lstm_num_nodes], stddev=0.1))
    fm = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    fb = tf.Variable(tf.ones([1, lstm_num_nodes]))
    # Memory cell: input, state and bias.                     
    cx = tf.Variable(tf.truncated_normal([num_hidden, lstm_num_nodes], stddev=0.1))
    cm = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    cb = tf.Variable(tf.ones([1, lstm_num_nodes]))
    # Output gate: input, previous output, and bias.
    ox = tf.Variable(tf.truncated_normal([num_hidden, lstm_num_nodes], stddev=0.1))
    om = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    ob = tf.Variable(tf.ones([1, lstm_num_nodes]))
    
    def lstm_cell(i, o, state):
        """Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
        Note that in this formulation, we omit the various connections between the
        previous state and the gates."""        
        input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)        
        forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
        update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
        state = forget_gate * state + input_gate * tf.tanh(update)
        output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
        return output_gate * tf.tanh(state), state
   
    #data to predict labels from
    tf_data = tf.placeholder(tf.float32, [None, image_size, image_size, num_channels])
    conv_data=conv(tf_data)
    
    state_shape=tf.concat(0,[tf.slice(tf.shape(tf_data),[0],[1]),[lstm_num_nodes]])
    state = tf.zeros(state_shape)
    output = tf.zeros(state_shape)
    
    tf_train_data=[]
    tf_train_labels=[]
    tf_predictions=[]
    losses=[]
    for i in range(max_labels) :
        num_labels_i= (num_labels-1) if i==0 else num_labels # first label has only 10 candidates
        
        # train data for the ith prediction model
        tf_data_i=tf.placeholder(tf.float32, [None, image_size, image_size, num_channels])
        tf_train_data.append(tf_data_i)
        conv_data_i=conv(tf_data_i)
        tf_labels_i=tf.placeholder(tf.float32, [None, num_labels_i])
        tf_train_labels.append(tf_labels_i)
        
        state_i_shape=tf.concat(0,[tf.slice(tf.shape(tf_data_i),[0],[1]),[lstm_num_nodes]])
        state_i = tf.zeros(state_i_shape)
        output_i = tf.zeros(state_i_shape)
        
        #compute rnn for the prediction data
        output,state=lstm_cell(conv_data,output,state)
        
        #pass the i th data through the previous steps rnn
        for _ in range(i+1):
            output_i,state_i=lstm_cell(conv_data_i,output_i,state_i)
        
        # i th weights and biases.
        weights_i = tf.Variable(tf.truncated_normal([lstm_num_nodes, num_labels_i], stddev=0.1))
        biases_i = tf.Variable(tf.ones([num_labels_i]))
        
        def model(data):
            hidden=tf.nn.relu(data)
            return tf.matmul(data, weights_i) + biases_i
        
        # i th digit prediction
        prediction_i = tf.nn.softmax(model(output))
        tf_predictions.append(prediction_i)
        
        # i th loss
        logits_i = model(output_i)
        loss_i = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits_i, tf_labels_i))
        losses.append(loss_i)
        
    loss=sum(losses)
    optimizer = tf.train.AdamOptimizer(0.5).minimize(loss)
    saver = tf.train.Saver()

In [None]:
train_batch_size=128
valid_batch_size=64
test_batch_size=128
train_dir="/notebooks"
checkpoint_path = os.path.join(train_dir, "predict_digits.ckpt")

def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

def print_labels(labels_to_print,predictions_to_print):
    print("tartet: ")
    print([np.argmax(label,1)[:5] for label in labels_to_print])
    print("predicted: ")
    print([np.argmax(label,1)[:5] for label in predictions_to_print]) 

with tf.Session(graph=graph) as session:
    ckpt = tf.train.get_checkpoint_state(train_dir)
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        tf.initialize_all_variables().run()
    print('Initialized')
    try:
        step=0
        while True:
            train_feed_dict={}
            for i,generator in enumerate(train_batch_generators):
                indices=generator._next(train_batch_size)
                data_i=train_data[indices,:]
                train_feed_dict[tf_train_data[i]]=data_i
                labels_i=train_labels[i][indices,:]
                train_feed_dict[tf_train_labels[i]]=labels_i
            session.run([optimizer],feed_dict=train_feed_dict)
            if (step % 20 == 0):
                train_loss=session.run(losses,feed_dict=train_feed_dict)
                print('Loss at step %d:' % (step), train_loss)
                
                valid_offset=valid_index_generator._next(valid_batch_size)
                valid_data_sample=valid_data[valid_offset:valid_offset+valid_batch_size]
                valid_predictions=session.run(tf_predictions,{tf_data:valid_data_sample})
                valid_labels_sample=[label_list[valid_offset:valid_offset+valid_batch_size] for label_list in valid_labels]
                print('Validation accuracy(%): ', [accuracy(predictions, labels) for predictions, labels
                                                   in zip(valid_predictions,valid_labels_sample)])
            if (step % 60 == 0):
                print("showing some predictions")
                print_labels(valid_labels_sample,valid_predictions)
            if(step % 1000==0):
                print("saving the variables")
                saver.save(session, checkpoint_path, global_step=step)
            step+=1
    except KeyboardInterrupt:        
        test_offset=test_index_generator._next(test_batch_size)
        test_data_sample=test_data[test_offset:test_offset+test_batch_size]
        test_predictions=session.run(tf_predictions,{tf_data:test_data_sample})
        test_labels_sample=[label_list[test_offset:test_offset+test_batch_size] for label_list in test_labels]
        print('Test accuracy(%): ', [accuracy(predictions, labels) for predictions, labels
                                      in zip(test_predictions,test_labels_sample)])
        print("showing some test predictions")
        print_labels(test_labels_sample,test_predictions)