# CIFAR-10 Object Classification

The CIFAR-10 dataset contains 60k 32x32 pixel color images from 10 different classes.

The classes are:
- airplane 
- automobile 
- bird 
- cat 
- deer 
- dog 
- frog 
- horse 
- ship 
- truck

Tasks:

- implement the TODOs
- train a MLP to achieve >40% test accuracy
- add TensorBoard summaries
- train a CNN to achieve >80% test accuracy

Help:
- use the TensorFlow API Documentation [https://www.tensorflow.org/api_docs/](https://www.tensorflow.org/api_docs/)

<hr>

# Download data

In [1]:
%%sh
# download CIFAR-10
wget -q https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
# unpack
tar xzf cifar-10-python.tar.gz
# remove tar.gz
rm cifar-10-python.tar.gz

# Imports

In [1]:
import tensorflow as tf
import numpy as np
import pickle

# Preparations

In [3]:
# function to unpickle data files
def unpickle(file):
    with open(file, 'rb') as fo:
        obj = pickle.load(fo, encoding='bytes')
    return obj

# function to store  data in pickle file
def store(obj, filename):
    pickle.dump(obj, open('cifar-10-batches-py/' + filename, 'wb'))

Test-code:
```python
x_train_b = unpickle('cifar-10-batches-py/data_batch_' + str(1)).get(bytes('data', 'ascii'))
img = x_train_b[1]
print(img.shape)
r = img[0:1024]
g = img[1024:2048]
b = img[2048:3072]
print(r.shape)
print(g.shape)
print(b.shape)
rgb = np.dstack((r,g,b))
```

In [4]:
# TODO: decode pickle data as images
# see https://www.cs.toronto.edu/~kriz/cifar.html
def decode_as_image(img_flat):
    img_R = img_flat[0:1024]
    img_G = img_flat[1024:2048]
    img_B = img_flat[2048:3072]
    return np.dstack((img_R, img_G, img_B))

In [5]:
# load train data and save to disk for later usage
# note: you might need to give Docker more memory
# alternatively, execute separately
x_train = []
for i in range(1, 6):
    x_train_b = unpickle('cifar-10-batches-py/data_batch_' + str(i)).get(bytes('data', 'ascii'))
    for img in x_train_b:
        img = decode_as_image(img)
        x_train.append(img)

# reshape the data
x_train = np.array(x_train).reshape(5*10000, 32*32, 3)

# save to disk
store(x_train, 'x_train')

In [6]:
# load test data and save to disk for later usage
x_test = []
x_test_b = unpickle('cifar-10-batches-py/test_batch').get(bytes('data', 'ascii'))
for img in x_test_b:
    img = decode_as_image(img)
    x_test.append(img)

# reshape the data
x_test = np.array(x_test).reshape(1*10000, 32*32, 3)

# save to disk
store(x_test, 'x_test')

In [7]:
# load train labels and save to disk
y_train = []
for i in range(1, 6):
    y_train_b = unpickle('cifar-10-batches-py/data_batch_' + str(i)).get(bytes('labels', 'ascii'))
    for img in y_train_b:
        y_train.append(img)
        
# reshape the data
y_train = np.array(y_train).flatten()

# save to disk
store(y_train, 'y_train')

In [8]:
# load test labels and save to disk
y_test = []
y_test_b = unpickle('cifar-10-batches-py/test_batch').get(bytes('labels', 'ascii'))
for img in y_test_b:
    y_test.append(img)
        
# reshape the data
y_test = np.array(y_test).flatten()

# save to disk
store(y_test, 'y_test')

# Load prepared data

In [4]:
x_train = unpickle("cifar-10-batches-py/x_train")
x_test = unpickle("cifar-10-batches-py/x_test")
y_train = unpickle("cifar-10-batches-py/y_train")
y_test = unpickle("cifar-10-batches-py/y_test")

In [5]:
# mapping from label number to label
label_mapping = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

def get_label(i):
    return label_mapping[i]

In [6]:
# plots the first 3 entries in the train set
import matplotlib.pyplot as plt

rand = np.random.randint(50000 - 1)
i = 0
for idx in range(rand, rand + 3):
    plt.subplot(1, 3, i + 1)
    plt.title("Class: {}".format(get_label(int(y_train[idx]))))
    plt.imshow(x_train[idx].reshape(32,32,3))
    i += 1
    plt.show()

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

In [7]:
x_train = x_train.reshape(-1, 32*32*3)
x_test = x_test.reshape(-1, 32*32*3)

In [8]:
# TODO: normalize data and cast to float32
x_train = x_train.astype(np.float32) / 255.0
x_test = x_test.astype(np.float32) / 255.0
print(x_train.shape)

(50000, 3072)


# Defining the inputs

In [9]:
# TODO: define network parameters
n_input = 3072 # image shape
n_channels = 3 # number of channels
n_classes = 10 # number of CIFAR-10 classes

In [10]:
# one hot encoding of labels
def one_hot_encode(a, length):
    temp = np.zeros((a.shape[0], length))
    temp[np.arange(a.shape[0]), a] = 1
    return temp

print(y_train)
y_train = one_hot_encode(y_train.astype(np.int), n_classes)
y_test = one_hot_encode(y_test, n_classes)

[6 9 9 ... 9 1 1]


In [11]:
# TODO: define placeholder
x = tf.placeholder(tf.float32, [None, n_input], "x")
y = tf.placeholder(tf.float32, [None, n_classes], "y")

In [12]:
# TODO: define hyper parameters
learning_rate = 0.0001
training_iters = 1000000
batch_size = 256
display_step = 100

Check [this](https://towardsdatascience.com/cifar-10-image-classification-in-tensorflow-5b501f7dc77c) tutorial for the CNN

In [19]:
def mlp(x):
    # TODO: define MLP
    flatten = tf.layers.flatten(x)
    hidden1 = tf.layers.dense(inputs=flatten, units=255, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=hidden1, rate=0.4)
    hidden2 = tf.layers.dense(inputs=dropout, units=255, activation=tf.nn.relu)
    pred = tf.layers.dense(hidden2, 10, activation=tf.nn.softmax)
    
    return pred

def cnn(x):
    # TODO: define CNN
    tf.reset_default_graph()
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3), name='input_x')
    y =  tf.placeholder(tf.float32, shape=(None, 10), name='output_y')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    conv1_filter = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 64], mean=0, stddev=0.08))
    conv2_filter = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 128], mean=0, stddev=0.08))
    conv3_filter = tf.Variable(tf.truncated_normal(shape=[5, 5, 128, 256], mean=0, stddev=0.08))
    conv4_filter = tf.Variable(tf.truncated_normal(shape=[5, 5, 256, 512], mean=0, stddev=0.08))

    # 1, 2
    conv1 = tf.nn.conv2d(x, conv1_filter, strides=[1,1,1,1], padding='SAME')
    conv1 = tf.nn.relu(conv1)
    conv1_pool = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    conv1_bn = tf.layers.batch_normalization(conv1_pool)

    # 3, 4
    conv2 = tf.nn.conv2d(conv1_bn, conv2_filter, strides=[1,1,1,1], padding='SAME')
    conv2 = tf.nn.relu(conv2)
    conv2_pool = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')    
    conv2_bn = tf.layers.batch_normalization(conv2_pool)
  
    # 5, 6
    conv3 = tf.nn.conv2d(conv2_bn, conv3_filter, strides=[1,1,1,1], padding='SAME')
    conv3 = tf.nn.relu(conv3)
    conv3_pool = tf.nn.max_pool(conv3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')  
    conv3_bn = tf.layers.batch_normalization(conv3_pool)
    
    # 7, 8
    conv4 = tf.nn.conv2d(conv3_bn, conv4_filter, strides=[1,1,1,1], padding='SAME')
    conv4 = tf.nn.relu(conv4)
    conv4_pool = tf.nn.max_pool(conv4, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    conv4_bn = tf.layers.batch_normalization(conv4_pool)
    
    # 9
    flat = tf.contrib.layers.flatten(conv4_bn)  

    # 10
    full1 = tf.contrib.layers.fully_connected(inputs=flat, num_outputs=128, activation_fn=tf.nn.relu)
    full1 = tf.nn.dropout(full1, keep_prob)
    full1 = tf.layers.batch_normalization(full1)
    
    # 11
    full2 = tf.contrib.layers.fully_connected(inputs=full1, num_outputs=256, activation_fn=tf.nn.relu)
    full2 = tf.nn.dropout(full2, keep_prob)
    full2 = tf.layers.batch_normalization(full2)
    
    # 12
    full3 = tf.contrib.layers.fully_connected(inputs=full2, num_outputs=512, activation_fn=tf.nn.relu)
    full3 = tf.nn.dropout(full3, keep_prob)
    full3 = tf.layers.batch_normalization(full3)    
    
    # 13
    full4 = tf.contrib.layers.fully_connected(inputs=full3, num_outputs=1024, activation_fn=tf.nn.relu)
    full4 = tf.nn.dropout(full4, keep_prob)
    full4 = tf.layers.batch_normalization(full4)        
    
    # 14
    pred = tf.contrib.layers.fully_connected(inputs=full3, num_outputs=10, activation_fn=None)
    
    #x = tf.reshape(x, [-1, 3, 32,32]).transpose(0,2,3,1)
    #conv1 = tf.layers.conv2d(inputs=x, filters=32, kernel_size=[5,5], data_format="channels_last", activation=tf.nn.relu)
    #pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=(3,3), strides=1)
    #flatten = tf.layers.flatten(inputs=pool1)
    #pred = tf.layers.dense(flatten, 10, activation=tf.nn.softmax)
    #pred = tf.layers.dense(x, 10, activation=tf.nn.softmax)
    
    return pred

In [17]:
# build network
#pred = mlp(x)
pred = cnn(x)

# define cost function and optimizer
cost = tf.reduce_mean(tf.losses.softmax_cross_entropy(y, pred))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# TODO: define tensorboard summaries
#ith tf.name_scope('performance'):
#   tf_loss_ph = tf.placeholder(tf.float32,shape=None,name='loss_summary')
#   tf.summary.scalar('loss', tf_loss_ph)
#  tf_accuracy_ph = tf.placeholder(tf.float32,shape=None, name='accuracy_summary')
#   tf_accuracy_summary = tf.summary.scalar('accuracy', tf_accuracy_ph)

train_acc = tf.summary.scalar('accuracy', accuracy)
train_cost = tf.summary.scalar('cost', cost)

merged = tf.summary.merge_all() # merges train acc and cost summaries

test_acc = tf.summary.scalar('accuracy_test', accuracy)

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Use keras.layers.max_pooling2d instead.


ValueError: Shapes (?, 32, 32, 10) and (?, 10) are incompatible

# Train and evaluate

In [15]:
# initializing the variables
init = tf.global_variables_initializer()

progbar = tf.keras.utils.Progbar(training_iters, stateful_metrics=["loss", "acc"])

with tf.Session() as sess:
    sess.run(init)
    train_writer = tf.summary.FileWriter('tf-summary/train', sess.graph)
    test_writer = tf.summary.FileWriter('tf-summary/test')
    step = 1
    
    # training loop
    while step * batch_size < training_iters:
        indices = np.random.randint(x_train.shape[0], size=batch_size)
        batch_x = x_train[indices]
        batch_y = y_train[indices]
        # run optimization op (backprop)
        if step % display_step == 0:
            #print("Step:", step)
            # calculate train batch loss and accuracy
            loss, acc, summary = sess.run([cost, accuracy, merged], feed_dict={x: batch_x, y: batch_y})
            #loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})
            train_writer.add_summary(summary, step*batch_size)
            
            progbar.update(step*batch_size, values=[("loss", loss), ("acc", acc)])
            
            # TODO: calculate test accuracy of random test batch
            indices = np.random.randint(x_test.shape[0], size=batch_size)
            test_batch_x = x_test[indices]
            test_batch_y = y_test[indices]
            acc = sess.run(test_acc, feed_dict={x: test_batch_x, y: test_batch_y})
            test_writer.add_summary(acc, step*batch_size)
        else:
            #print("Step:", step)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        step += 1
    
    print("\n")
    print ("Optimization Finished!")
    
    # calculate accuracy for MNIST test images
    print ("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: x_test,
                                      y: y_test}))


Optimization Finished!
Testing Accuracy: 0.4257
