# Tensorflow explained to myself
Based on the 'Deep MNIST tutorial for experts' tutorial. <br>
Some things were not crystal clear when reading the tutorial, so I figured I might write my own tutorial so I can quickly look up things I forgot.

In [1]:
from __future__ import division
import numpy as np
import pandas as pd
import tensorflow as tf

# Data
Importing data

In [2]:
# TRAIN
train = pd.read_csv("/Users/edouardcuny/Downloads/train.csv")
x_train = train.iloc[:,1:]
y_train = train.iloc[:,0]
x_train = x_train.as_matrix()
x_train = x_train/255

y_train = y_train.as_matrix()
x_train = x_train.astype('float64')
y_train = y_train.astype('float64')

# y_train from labels to one hot
y = np.zeros([y_train.shape[0],10])
for i in range(y.shape[0]):
    y[i, int(y_train[i])]=1
y_train = y


# TEST
test = pd.read_csv("/Users/edouardcuny/Downloads/test.csv")
x_test = test.as_matrix()
x_test = x_test/255

Custom functions to iterate through the training data. <br>
Need to do this since data is imported in a numpy array.


In [3]:
#_______ ITERATE THROUGH BATCH _______ #

def next_batch_x_train(batch_size):
    global index_batch_x_train
    array = x_train
    
    if batch_size > array.shape[0]:
        raise IndexError
    
    if (index_batch_x_train+1)*batch_size > array.shape[0]:
        index = index_batch_x_train
        index_batch_x_train = 0
        return array[index*batch_size:,:]
    
    else:
        index = index_batch_x_train
        index_batch_x_train += 1
        return array[index*batch_size:(index+1)*batch_size]

def next_batch_y_train(batch_size):
    global index_batch_y_train
    array = y_train
    
    if batch_size > array.shape[0]:
        raise IndexError
    
    if (index_batch_y_train+1)*batch_size > array.shape[0]:
        index = index_batch_y_train
        index_batch_y_train = 0
        return array[index*batch_size:,:]
    
    else:
        index = index_batch_y_train
        index_batch_y_train += 1
        return array[index*batch_size:(index+1)*batch_size]

# Feedforward

First we begin by creating custom functions to generate the variables to have cleaner code.

In [4]:
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

Creating x (the input) is pretty straighforward. <br>
It's a placeholder. <br>
Its size is [None, 784] : 
* None = flexible number of examples; None means that it can be anything
* 784 = image size


Then we reshape x to pass it in the convolutional layer. <br>
Expected format is [batch, in_height, in_width, in_channels] with : 
* **batch** is the number of examples we want it to be flexible so : **-1**
* **in_height** is the height of the picture sqrt(784) = **28**
* **in_width** = **28**
* **in_channels** is the number of input channels / feature maps = **1**

In [5]:
x = tf.placeholder(tf.float32, [None, 784])
x_image = tf.reshape(x, [-1,28,28,1])

## First convolutional layer

Then we set the shape of the variables. <br>
There is one bias per feature map (we have 32 feature maps) <br>

The weight/filter is in the format [filter_height, filter_width, in_channels, out_channels] <br>
We're doing a 5x5 filter with 32 feature maps so :
* **filter_height** = 5
* **filter_height** = 5
* **in_channels** = 1
* **out_channels** = 32

In [6]:
b_conv1 = bias_variable([32])
W_conv1 = weight_variable([5, 5, 1, 32])

Now we can define our first convolutional layer. <br>

A word about the strides. <br>
It defines how we iterate through x so the strides follow the same format : [batch, in_height, in_width, in_channels]

* We're using a stride of one for the convolutional layer. <br>
* And a 2x2 max pooling layer with no overlap (stride = window/ksize)

As always we define two functions to make the code cleaner. <br>


In [7]:
def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

We can then compute our outputs (we're using relu activation functions).

In [8]:
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

## Second convolutional layer
Nothing to explain.

In [9]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

## Densely connected layer
We now do a fully connected layer taking all the neurons and outputing 1024 neurons. <br> 
* Our image has a size of 7x7 (size is unchanged after the convolutional part but is reduced by 2 by the pooling step). 
* We have 64 feature maps/

We have thus have 7x7x64 input neurons and 1024 output neurons.

In [10]:
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

We need to reshape the output of the second maxpooling layer. <br>
As usual the -1 indicated that we can any number of input examples

In [11]:
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

## Dropout
Dropout layer with a probabilty that is a parameter of our network. <br>
You can see the tensorflow workflow as one big function and placeholders are arguments of this function. <br>
We want the probability of a neuron to be dropped out to be an argument so its a placeholder.

In [12]:
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

## Readout layer
Nothing to say.

In [13]:
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2


## Loss & Optimizer
We use a cross entropy cost minimized by an Adam Optimizer.

In [14]:
y_ = tf.placeholder(tf.float32, [None, 10])

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))

train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

## Training and scoring everything

In [18]:
# SESSION TENSORFLOW
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# TRAIN
batch_size = 50
epochs = 24
train_size = train.shape[0] 

index_batch_x_train = 0
index_batch_y_train = 0

from time import time
start = time()
#for i in range(int((epochs*train_size)/batch_size)):
for i in range(10):
  if i%100==0:
    print(int(i/100),'/201 ### ELAPSED TIME = ', int((time()-start)/60), 'minutes') 
  batch_xs = next_batch_x_train(batch_size)
  batch_ys = next_batch_y_train(batch_size)
  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys, keep_prob : 0.5})
      
# TEST
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: x_train,
                                    y_: y_train}))

#%% TRAINING IS NOW DONE

# FIRST ONE
prediction = sess.run(tf.argmax(y_conv,1), feed_dict={x: x_test[:1000,:], keep_prob : 1})
prediction = pd.DataFrame(prediction)

# MIDDLE ONES
i = 1
while (i+1)*1000 < x_test.shape[0]-1:
  prediction2 = sess.run(tf.argmax(y_conv,1), feed_dict={x: x_test[i*1000:(i+1)*1000,:], keep_prob : 1})
  prediction2 = pd.DataFrame(prediction2)
  prediction = pd.concat([prediction, prediction2], axis=0)
  i+=1

# LAST ONE
prediction2 = sess.run(tf.argmax(y_conv,1), feed_dict={x: x_test[i*1000:,:], keep_prob : 1})
prediction2 = pd.DataFrame(prediction2)
prediction = pd.concat([prediction, prediction2], axis=0)

prediction[1] = [x+1 for x in range(prediction.shape[0])]
prediction = prediction.iloc[:,[1,0]]
prediction.columns = ['ImageId', 'Label']
prediction.to_csv('/Users/edouardcuny/Desktop/ml/tf/submission.csv', index=False)

0 /201 ### ELAPSED TIME =  0 minutes
1.0


# Score

Ran this code on another machine. <br>
Final score on the Kaggle competition : 
* 
* 