# Build a model to decode sequences of digits from images

In [None]:
%matplotlib inline

# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import sys
import matplotlib.pyplot as plt

## Load data

In [None]:
np.random.seed(133)

def load_data(file_name,valid_size=0):
     with open(file_name, 'r') as f:
        data=pickle.load(f)
        labels=data["label"]
        images=data["image"]
        if valid_size:
            all_data=zip(images,labels)
            np.random.shuffle(all_data)
            valid_data=all_data[:valid_size]
            valid_images,valid_labels=tuple([list(l) for l in zip(*valid_data)])
            train_data=all_data[valid_size:]
            train_images,train_labels=tuple([list(l) for l in zip(*train_data)])
            return train_images,train_labels,valid_images,valid_labels
        return images,labels
valid_size=1670 #about 5% of the data
loaded_train_data,loaded_train_labels,loaded_valid_data,loaded_valid_labels=load_data("train.pickle",valid_size)
loaded_test_data,loaded_test_labels=load_data("test.pickle")
print("train:",len(loaded_train_data),len(loaded_train_labels))
print("validation:",len(loaded_valid_data),len(loaded_valid_labels))
print("test:",len(loaded_test_data),len(loaded_test_labels))

### Preprocess data

In [None]:
max_labels=3
num_labels=11
distinct_labels=np.arange(num_labels).astype(np.float32) 
blanc_label=distinct_labels[0] # 0 represensts blanc label
image_size=32

def to_one_hot(label,distincts=None):
    """convert labels to 1-hot encoding"""
    if distincts is None:
        distincts=distinct_labels
    return (distincts == label).astype(np.float32)


def from_one_hot(label):
    """retrieve labels from 1-hot encoded"""    
    return np.argmax(label)+(0 if len(label)==num_labels else 1)

def preprocess_labels(labels):
    """padd labels, and then convert them to 1-hot"""
    processed=[list() for i in range(max_labels)]
    for image_label in labels :
        for i in range(max_labels):
            distincts=distinct_labels[1:] if i==0 else distinct_labels # first label cannot be blanc, so there are only 10 candidates for the first label
            label=image_label[i] if i<len(image_label) else blanc_label 
            processed[i].append(to_one_hot(label,distincts))    
    return [np.asanyarray(l,dtype=np.float32) for l in processed]        
        

def rgb2gray(rgb):
    """convert rgb image to gray
        see http://stackoverflow.com/questions/12201577/how-can-i-convert-an-rgb-image-into-grayscale-in-python"""
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

def preprocess_images(images):
    """convert to numpy array and apply grayscale"""
    np_images=np.asanyarray(images,dtype=np.float32)
    gray_images=rgb2gray(np_images)
    reshaped_images=gray_images.reshape((-1, image_size * image_size)).astype(np.float32)
    return reshaped_images
    
def preprocess_data(images,labels):
    return preprocess_images(images),preprocess_labels(labels)
 

In [None]:
train_data,train_labels=preprocess_data(loaded_train_data,loaded_train_labels)
valid_data,valid_labels=preprocess_data(loaded_valid_data,loaded_valid_labels)
test_data,test_labels=preprocess_data(loaded_test_data,loaded_test_labels)

#### Display some labels and images to make sure that preprocessing ran fine

In [None]:
def display_image(image):
    plt.figure()
    plt.imshow(image)
    plt.show()
    
def display_images(data,labels,loaded_data,loaded_labels,name=None,num_images=5):
    print ("showing some labels and images for %s"%name)  
    print ("data shape: ",data.shape)
    print("labels shape: ",[label.shape for label in labels])
    total_images=data.shape[0]
    for i in np.random.choice(total_images,num_images):
        print([from_one_hot(label[i]) for label in labels])   
        display_image(data[i].reshape(image_size,image_size))
        if loaded_data!=None:
            print(loaded_labels[i])
            display_image(loaded_data[i])
            
display_images(train_data,train_labels,loaded_train_data,loaded_train_labels,"train")
display_images(valid_data,valid_labels,loaded_valid_data,loaded_valid_labels,"valid")
display_images(test_data,test_labels,loaded_test_data,loaded_test_labels,"test")

In [None]:
class IndexGenerator(object):
    """generates the next index of the data from wich we can take a subset of length batch_size"""
    def __init__(self, length, batch_size):
        self._step=0
        self._length=length
        self._batch_size=batch_size
    def _next(self):
        offset = (self._step * self._batch_size) % (self._length - self._batch_size)
        self._step=self._step+1
        return offset
        
train_batch_size=1024
valid_batch_size=128
test_batch_size=256
train_index_generator=IndexGenerator(len(train_data),train_batch_size)
valid_index_generator=IndexGenerator(len(valid_data),valid_batch_size)
test_index_generator=IndexGenerator(len(test_data),test_batch_size)
#print("train indexes:",train_index_generator._next(),train_index_generator._next(),train_index_generator._next(),train_index_generator._next())
#for i in range(100):
#    print(valid_index_generator._next())

In [None]:
class BatchGenerator(object):
    """generates a batch of data balancing zeros(blanc labels) with other labels"""
    def __init__(self,labels,batch_size):
        self.step=0
        self.batch_size=batch_size
        self.length=labels.shape[0]
        self.with_zeros=(labels.shape[1]==num_labels)        
        indices=np.arange(labels.shape[0])
        self.indices=indices
        np.random.shuffle(indices)
        if self.with_zeros:
            zero_indices=indices[np.argmax(labels,1)==0]
            other_indices=indices[np.argmax(labels,1)!=0]
            self.zero_count=zero_indices.shape[0]
            self.other_count=other_indices.shape[0]
            assert self.zero_count+self.other_count==labels.shape[0], "different lengths found:%d + %d != %d"% (self.zero_count,
                                                                                                                self.other_count,labels.shape[0])
            self.zero_indices=zero_indices
            self.other_indices=other_indices
            self.zeroes_batch_size=batch_size/num_labels
            self.other_batch_size=batch_size-self.zeroes_batch_size
            
        
    def _next(self):
        if self.with_zeros:
            indices=[]
            zero_offset = (self.step * self.zeroes_batch_size) % (self.zero_count - self.zeroes_batch_size)
            indices.extend(self.zero_indices[zero_offset:zero_offset+self.zeroes_batch_size])
            other_offset = (self.step * self.other_batch_size) % (self.other_count - self.other_batch_size)
            indices.extend(self.other_indices[other_offset:other_offset+self.other_batch_size])
            assert len(indices)==self.batch_size, "foung bad indices size: %d!=%d"%(len(indices),self.batch_size)
        else:
            offset = (self.step * self.batch_size) % (self.length - self.batch_size)
            indices=self.indices[offset:offset+self.batch_size]
        self.step+=1        
        np.random.shuffle(indices)
        return indices
    
train_batch_generators=[BatchGenerator(labels,train_batch_size) for labels in train_labels]
        
        

In [None]:
for index,generator in enumerate(train_batch_generators):  
    #generator.step=31732
    indices=generator._next()
    data=train_data[indices,:]
    labels=train_labels[index][indices,:]
    display_images(data,[labels],None,None,name="some images from the generated batches by generator %d"%(index))
    
    

In [None]:
generator=train_batch_generators[2]
print(np.argmax(train_labels[1][generator._next(),:],1))


## Build the graph

In [None]:
squared_image_size=image_size*image_size
img_size=squared_image_size
lstm_num_nodes=128
graph = tf.Graph()
with graph.as_default():   
    
    ix = tf.Variable(tf.truncated_normal([img_size, lstm_num_nodes], stddev=0.1))
    im = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    ib = tf.Variable(tf.ones([1, lstm_num_nodes]))
    # Forget gate: input, previous output, and bias.
    fx = tf.Variable(tf.truncated_normal([img_size, lstm_num_nodes], stddev=0.1))
    fm = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    fb = tf.Variable(tf.ones([1, lstm_num_nodes]))
    # Memory cell: input, state and bias.                     
    cx = tf.Variable(tf.truncated_normal([img_size, lstm_num_nodes], stddev=0.1))
    cm = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    cb = tf.Variable(tf.ones([1, lstm_num_nodes]))
    # Output gate: input, previous output, and bias.
    ox = tf.Variable(tf.truncated_normal([img_size, lstm_num_nodes], stddev=0.1))
    om = tf.Variable(tf.truncated_normal([lstm_num_nodes, lstm_num_nodes], stddev=0.1))
    ob = tf.Variable(tf.ones([1, lstm_num_nodes]))
    
    def lstm_cell(i, o, state):
        """Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
        Note that in this formulation, we omit the various connections between the
        previous state and the gates."""        
        input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)        
        forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
        update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
        state = forget_gate * state + input_gate * tf.tanh(update)
        output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
        return output_gate * tf.tanh(state), state
   
    #data to predict labels from
    tf_data = tf.placeholder(tf.float32, [None, squared_image_size])
    
    state_shape=tf.concat(0,[tf.slice(tf.shape(tf_data),[0],[1]),[lstm_num_nodes]])
    state = tf.zeros(state_shape)
    output = tf.zeros(state_shape)
    
    tf_train_data=[]
    tf_train_labels=[]
    tf_predictions=[]
    losses=[]
    for i in range(max_labels) :
        num_labels_i= (num_labels-1) if i==0 else num_labels # first label has only 10 candidates
        
        # train data for the ith prediction model
        tf_data_i=tf.placeholder(tf.float32, [train_batch_size, squared_image_size])
        tf_train_data.append(tf_data_i)
        tf_labels_i=tf.placeholder(tf.float32, [train_batch_size, num_labels_i])
        tf_train_labels.append(tf_labels_i)
        
        state_i = tf.zeros([train_batch_size, lstm_num_nodes])
        output_i = tf.zeros([train_batch_size, lstm_num_nodes])
        
        #compute rnn for the prediction data
        output,state=lstm_cell(tf_data,output,state)
        
        #pass the i th data through the previous steps rnn
        for _ in range(i+1):
            output_i,state_i=lstm_cell(tf_data_i,output_i,state_i)
        
        # i th weights and biases.
        weights_i = tf.Variable(tf.truncated_normal([lstm_num_nodes, num_labels_i], stddev=0.1))
        biases_i = tf.Variable(tf.ones([num_labels_i]))
        
        # i th digit prediction
        prediction_i = tf.nn.softmax(tf.matmul(output, weights_i) + biases_i)
        tf_predictions.append(prediction_i)
        
        # i th loss
        logits_i = tf.matmul(output_i, weights_i) + biases_i
        loss_i = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits_i, tf_labels_i))
        losses.append(loss_i)
        
    loss=sum(losses)
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

In [None]:
num_steps = 1000001

def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    try:
        for step in range(num_steps):
            train_feed_dict={}
            for i,generator in enumerate(train_batch_generators):  
                #generator.step=31732
                indices=generator._next()
                data_i=train_data[indices,:]
                train_feed_dict[tf_train_data[i]]=data_i
                labels_i=train_labels[i][indices,:]
                train_feed_dict[tf_train_labels[i]]=labels_i
            session.run([optimizer],feed_dict=train_feed_dict)
            if (step % 10 == 0):
                train_loss=session.run([loss],feed_dict=train_feed_dict)
                print('Loss at step %d:' % (step), train_loss)
                
                valid_offset=valid_index_generator._next()
                valid_data_sample=valid_data[valid_offset:valid_offset+valid_batch_size]
                valid_predictions=session.run(tf_predictions,{tf_data:valid_data_sample})
                valid_labels_sample=[label_list[valid_offset:valid_offset+valid_batch_size] for label_list in valid_labels]
                print('Validation accuracy(%): ', [accuracy(predictions, labels) for predictions, labels
                                                   in zip(valid_predictions,valid_labels_sample)])
    except KeyboardInterrupt:        
        test_offset=test_index_generator._next()
        test_data_sample=test_data[test_offset:test_offset+test_batch_size]
        test_predictions=session.run(tf_predictions,{tf_data:test_data_sample})
        test_labels_sample=[label_list[test_offset:test_offset+test_batch_size] for label_list in test_labels]
        print('Test accuracy(%): ', [accuracy(predictions, labels) for predictions, labels
                                      in zip(test_predictions,test_labels_sample)])
        display_images(test_data_sample,test_predictions,
                      loaded_test_data[test_offset:test_offset+test_batch_size],
                       loaded_test_labels[test_offset:test_offset+test_batch_size],"displaying predicted labels")

In [None]:
processed=[list() for i in range(3)]
processed[1].append(2)
print(processed)
i=4
processed[i] if i<len(processed) else 0

In [None]:
help(tf.contrib.losses.sigmoid_cross_entropy)

In [None]:
with tf.Session() as session:

    # testin cosine distance
    a = tf.constant([[[1.0, 0.0],[0.0,1.0]],
                    [[0.0, 1.0],[0.0,1.0]]])
    b = tf.constant([[[0.0, 1.0],[0.0,1.0]],
                    [[0.0, 1.0],[0.0,1.0]]])
    c = tf.constant([[[0.95,0.05],[0.05,0.95]],
                    [[0.05,0.95],[0.05,0.95]]])

    dist =tf.contrib.losses.cosine_distance
    print("dim:",a.get_shape())
    print("aa: ",dist(a,a,2).eval())
    print("ab: ",dist(a,b,2).eval())
    print("ac: ",dist(a,c,2).eval())
    print("bc: ",dist(b,c,2).eval())
    print("cb: ",dist(c,b,2).eval())
    
    print("loga:",tf.log(a).eval())

    def reshape_and_normalize(vector):
        reshaped=tf.reshape(vector,[1,])
        length=vector.get_shape()[1]
        return reshaped/tf.cast(length,tf.float32)

    def reshaped_dist(pred,tar):
        return dist(reshape_and_normalize(pred),reshape_and_normalize(tar),1)
    state_size=tf.constant(rnn_cell.state_size)
    print(state_size)
    print(tf.concat(0,[tf.slice(tf.shape(a),[0],[1]),[state_size]]).eval()[1])


In [None]:
len(valid_labels)