In [None]:
import tensorflow as tf
from time import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import mlutils
%matplotlib inline

# Lab: Convolutional network with TensorFlow low level API


### RECOMMENDATION

- close all applications
- install Maxthon browser http://www.maxthon.com
- open only VirtualBox and Maxthon


**Using a small dataset based on [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html)**


In [None]:
!wget -nc https://s3.amazonaws.com/rlx/mini_cifar.h5
import h5py
with h5py.File('mini_cifar.h5','r') as h5f:
    x_cifar = h5f["x"][:]
    y_cifar = h5f["y"][:]

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_cifar, y_cifar, test_size=.25)
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
print("\ndistribution of train classes")
print(pd.Series(y_train).value_counts())
print("\ndistribution of test classes")
print(pd.Series(y_test).value_counts())

# Part 1: Build a CNN with TF Low Level API

### Build the convolutional network model

with the same architecture as in the corresponding notebook:

    Layer (type)                 Output Shape              Param #   
    =================================================================
    input_1 (InputLayer)         (None, 32, 32, 3)         0         
    _________________________________________________________________
    conv2d (Conv2D)              (None, 32, 32, 15)        735       
    _________________________________________________________________
    flatten (Flatten)            (None, 15360)             0         
    _________________________________________________________________
    dense (Dense)                (None, 16)                245776    
    _________________________________________________________________
    output_1 (Dense)             (None, 3)                 51        
    =================================================================
    Total params: 246,562
    Trainable params: 246,562
    Non-trainable params: 0
    _________________________________________________________________

#### understand carefully the example [here](http://www.jessicayung.com/explaining-tensorflow-code-for-a-convolutional-neural-network/). 

Complete the following function. You will have to:

1. Declare tensor symbolic variables for inputs and model parameters:

    - Define placefolders for X and y
    - Define tf variables for W's and b's. You will have to think carefully about their shapes.


2. Build the computational graph

    - Use [tf.random_normal](https://www.tensorflow.org/api_docs/python/tf/random/normal) with mean 0 and std 1 as initialization distribuition for all W's and b's
    - Use [tf.nn.conv2d](https://www.tensorflow.org/api_docs/python/tf/nn/conv2d) for the convolutional layer (`h_conv1`)
    - Use [tf.reshape](https://www.tensorflow.org/api_docs/python/tf/reshape) to transition from the convolutional layer to the dense layer (`h_conv1_flat`)
    - Model the dense layer with TF matrix multiplication and relu activation (`h_dense`)
    - Model the output with three output neurons and softmax activation (`y_proba`)

the shapes of the weights your define must be equal to the ones printed out in the corresponding notebook.




In [None]:
def build_model(n_filters, filter_size, dense_size, img_size, n_channels):

    init_stddev = 0.01
    
    tf.reset_default_graph()
    
    tX = ...
    ty = ...

    w_conv1 = ...
    b_conv1 = ...

    w_dense = ...
    b_dense = ...

    w_out   = ...
    b_out   = ...


    with tf.name_scope("cnn"):
        h_conv1      = ...
        h_conv1_flat = ...
        h_dense      = ...
        y_proba      = ...

    with tf.name_scope("cross_entropy"):
        y_hat        = tf.argmax(y_proba, axis=1)
        xentropy     = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_proba, labels=ty)

    with tf.name_scope("optimization"):
        loss         = tf.reduce_mean(xentropy)
        optimizer    = tf.train.AdamOptimizer()
        training_op  = optimizer.minimize(loss)

    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(y_proba,ty,1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.name_scope("init_and_save"):
        init = tf.global_variables_initializer()

    print ("-- weights shape --")
    print (w_conv1.shape)
    print (b_conv1.shape)
    print (w_dense.shape)
    print (b_dense.shape)
    print (w_out.shape)
    print (b_out.shape )

    return tX, ty, init, accuracy, training_op, loss

In [None]:
n_classes = len(np.unique(y_cifar))
print("using", n_classes, "classes")

n_filters   = 15
filter_size = 4
dense_size  = 16
n_channels  = 3 
img_size    = 32

n_epochs = 30
batch_size = 32

In [None]:
tX, ty, init, accuracy, training_op, loss = build_model(n_filters, filter_size, dense_size, img_size, n_channels)

## Create the optimization loop

keep track of accuracy and loss in both train and test. Base your implementation on the notebook describing TF low level API.

Observe that accuracy must keep one metric per epoch averaging the accuracy obtained in all batches. Likewise for loss.

Plot the accuracy and loss curves for test and train separately, which should look like the following

![](Images/lab_batch_01.png)



In [None]:

num_examples = len(x_train)
acc_train, acc_test = [], []
loss_train, loss_test = [], []

with tf.Session() as sess:
    
    sess.run(init)
    for epoch in range(n_epochs):
        
        idxs = np.random.permutation(len(x_train))
        eacc, eloss = [], []
        
        for iteration in range(num_examples // batch_size):
            
            X_batch = ...
            y_batch = ...
            _, _eacc, _eloss = ...
            
            eacc += [_eacc]
            eloss += [_eloss]
            
        acc_train += ...
        acc_test  += ...
        loss_train += ...
        loss_test  += ...
        
        print ("epoch: %3d"%(epoch+1), "  train accuracy: %.4f"%acc_train[-1], "  test accuracy: %.4f"%acc_test[-1], "  train loss: %.4f"%loss_train[-1], "  test loss: %.4f"%loss_test[-1])

In [None]:
 ... plot accuracy and loss for train and test ...

# Part 2

Modify the optimization loop so that each batch is normalized before feeding it to the optimization step according to the following spec:

- consider only $X^{i}$ in the current batch
- $X^{i}$: image $i$
- $X^{i}_{j|k}$: channel $k$ of pixel $j$ in image $i$
- $S^i$: image $i$ standardized

In pixel wise standardization, each pixel has zero mean and std=1 across the dataset:

- $\mu = \frac{1}{N}\sum_{i,j,k} X^{i}_{j|k}$
- $\sigma = \frac{1}{N}\sum_{i,j,k}^{N-1}(X^{i}_{j|k}-\mu_{j|k})^2$

So that:

$$S^{i}_{j|k} = \frac{1}{\sigma + 10^{-6}}(X^{i}_{j|k} - \mu)$$


The $10^{-6}$ is to avoid the case of zero variance

you must also plot:

- accuracy and loss curves for train and test separately, which should look better than the previous

![](Images/lab_batch_02.png)

- for only train, the accuracy and loss curves of both experiments, looking like this

![](Images/lab_batch_03.png)


In [None]:
tX, ty, init, accuracy, training_op, loss = build_model(n_filters, filter_size, dense_size, img_size, n_channels)

In [None]:
sx_test = np.r_[[(i-np.mean(i))/np.std(i) for i in x_test]]

In [None]:
num_examples = len(x_train)
bacc_train, bacc_test = [], []
bloss_train, bloss_test = [], []
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        ...
        for iteration in range(num_examples // batch_size):
            ...
            
        ...
        print ("epoch: %3d"%(epoch+1), "  train accuracy: %.4f"%bacc_train[-1], "  test accuracy: %.4f"%bacc_test[-1], "  train loss: %.4f"%bloss_train[-1], "  test loss: %.4f"%bloss_test[-1])

In [None]:
 ... plot accuracy and loss for train in both experiments ...