In this Project, we will work with CIFAR10 image dataset. 
The starter code to download the database using keras is given below. 
Test the project on Google Colab running on a CPU, GPU and TPU
 

# In every line of code, please write a comment to briefly explain what that line is doing.
Your grades will be based on your understanding of the code you write! 


In [None]:
%tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from keras import layers

import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

%matplotlib inline

# Task 1
Convert the features in a form that can be given as input to tensorflow library/functions

In this task you will perform data augmentation. That is, pre-process the data to make the model more robust. Experiment with common data augmentation techniques like rotation, translation, horizontal-flips, scaling and histogram equalization. 

In [None]:
from keras.datasets import cifar10
# downloading and separating the data into a training and test set.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
# Creatingt the augmentation tool
datagen = ImageDataGenerator(
    rotation_range=20,
    horizontal_flip=True)

# Task 2
Build a Deep Learning Neural Network model, train on the features and report the accuracy.
Report your observations on the time taken on a CPU and GPU (with and without CuDNN kernel).  


1.   Create a CNN based model with 4 hidden layers with 64, 128, 256, 512 units in each successive layer. 
2.   Create an LSTM based model with 1 LSTM layer with 256  units. 



## CNN Model

In [None]:
# Creating a function to create a CNN based model with 4 hidden layers and 4, 128, 256, 512 units in each successive layer
def create_model():
  # Sequential Model
  model = tf.keras.models.Sequential()
  # First Layer
  # Normalizing the inputs
  model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
  # Creating 64 channels using a 5 x 5 matrix.
  model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='elu'))
  # Max pooling with a 2x2 matrix
  model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
  # Drop random nodes
  model.add(tf.keras.layers.Dropout(0.25))
  
  # Second Layer
  model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
  model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='elu'))
  model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(tf.keras.layers.Dropout(0.25))

  #Third Layer
  model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
  model.add(tf.keras.layers.Conv2D(256, (5, 5), padding='same', activation='elu'))
  model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(tf.keras.layers.Dropout(0.25))

  # Fourth Layer
  model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
  model.add(tf.keras.layers.Conv2D(512, (5, 5), padding='same', activation='elu'))
  model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(tf.keras.layers.Dropout(0.25))

  # Flattening the Dataset
  model.add(tf.keras.layers.Flatten())
  
  model.add(tf.keras.layers.Dense(256))
  model.add(tf.keras.layers.Activation('elu'))
  model.add(tf.keras.layers.Dropout(0.5))
  model.add(tf.keras.layers.Dense(10))
  model.add(tf.keras.layers.Activation('softmax'))
  return model

### Running on CPU

In [None]:
%%time
model = create_model()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, ),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])

CPU times: user 304 ms, sys: 4.05 ms, total: 308 ms
Wall time: 277 ms


In [None]:
%%time
model.fit(
    datagen.flow(x_train.astype(np.float32), y_train.astype(np.float32)),
    epochs=5,
    batch_size =50,
    steps_per_epoch=500,
    validation_data=(x_test.astype(np.float32), y_test.astype(np.float32)),
    validation_freq=17
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 1h 12min 13s, sys: 49.6 s, total: 1h 13min 3s
Wall time: 37min 41s


<tensorflow.python.keras.callbacks.History at 0x7ff470da6828>

In [None]:
# Print out the accuracy of the model on the test set
%%time
scores = model.evaluate(x_test, y_test)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 1.3001537322998047
Test accuracy: 0.5162000060081482
CPU times: user 1min 49s, sys: 665 ms, total: 1min 50s
Wall time: 58.6 s


### Running on GPU

In [None]:
%%time
model = create_model()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, ),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])

CPU times: user 187 ms, sys: 6.55 ms, total: 193 ms
Wall time: 194 ms


In [None]:
%%time
model.fit(
    datagen.flow(x_train.astype(np.float32), y_train.astype(np.float32)),
    epochs=5,
    batch_size =50,
    steps_per_epoch=500,
    validation_data=(x_test.astype(np.float32), y_test.astype(np.float32)),
    validation_freq=17
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 1min 5s, sys: 7.18 s, total: 1min 12s
Wall time: 48.3 s


<tensorflow.python.keras.callbacks.History at 0x7f38e8205cf8>

In [None]:
# Print out the accuracy of the model on the test set
%%time
scores = model.evaluate(x_test, y_test)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 1.1735551357269287
Test accuracy: 0.5871000289916992
CPU times: user 1.2 s, sys: 123 ms, total: 1.32 s
Wall time: 1.92 s


## LSTM Model

In [None]:
# batchs
batch_size = 50
# Each MNIST image batch is a tensor of shape (batch_size, 32, 32).
# Each input sequence will be of size (32, 32) (height is treated like time).
input_dim = 32   # Features

units = 256
output_size = 10  # labels are from 0 to 9

# Build the RNN model
def build_model(allow_cudnn_kernel=True):
  # CuDNN is only available at the layer level, and not at the cell level.
  # This means `LSTM(units)` will use the CuDNN kernel,
  # while RNN(LSTMCell(units)) will run on non-CuDNN kernel.
  if allow_cudnn_kernel:
    # The LSTM layer with default options uses CuDNN.
    lstm_layer = tf.keras.layers.LSTM(units, input_shape=(None, input_dim))
  else:
    # Wrapping a LSTMCell in a RNN layer will not use CuDNN.
    lstm_layer = tf.keras.layers.RNN(
        tf.keras.layers.LSTMCell(units),
        input_shape=(None, input_dim))
  model = tf.keras.models.Sequential([
      lstm_layer,
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(output_size, activation='softmax')]
  )
  return model


### Transformations
The reason we applied the transforms and then added it to the image list due to the fact the the flow method expected 4 ndim and the LSMT model expected 3 ndim. Due to this conflict we were unable to figure out a way to implement the flow method while running the LSTM model. Our solution was to apply transformations before putting it in the model.

In [None]:
# Create data generator object
datagen = ImageDataGenerator()
# Loop through each image, horizontal flip the image, and add to new list of images
x_train_transform = []
for i in range(len(x_train)):
  x_train_image = datagen.apply_transform(x_train[i], {'flip_horizontal':True, 'theta': 40})
  x_train_transform.append(x_train_image)

In [None]:
# Combining original images with flipped images
new_x_train = np.append(x_train, x_train_transform, axis = 0)
new_x_train.shape

(100000, 32, 32, 3)

In [None]:
# Double the y values too
new_y_train = np.append(y_train, y_train, axis = 0)
new_y_train.shape

(100000, 1)

### Changing to 3 images for each RGB value

In [None]:
# Training Set
# Separating the rgb values into three different images as the red, green, and blue image
# Flattening the images to process
x_train_rgblist=[]
y_train_rgblist=[]
# Recreating the image into the three different images
for i in range(len(new_x_train)):
  im_r = new_x_train[i,:,:,0]
  im_g = new_x_train[i,:,:,1]
  im_b = new_x_train[i,:,:,2]
  x_train_rgblist.append(im_r)
  x_train_rgblist.append(im_g)
  x_train_rgblist.append(im_b)
  y_train_rgblist.append(new_y_train[i])
  y_train_rgblist.append(new_y_train[i])
  y_train_rgblist.append(new_y_train[i])
x_train_rgb=np.asarray(x_train_rgblist)
y_train_rgb=np.asarray(y_train_rgblist)

In [None]:
# Testing Set
# Separating the rgb values into three different images as the red, green, and blue image
# Flattening the images to process
x_test_rgblist=[]
y_test_rgblist=[]
# Recreating the image into the three different images
for i in range(len(x_test)):
  im_r = x_test[i,:,:,0]
  im_g = x_test[i,:,:,1]
  im_b = x_test[i,:,:,2]
  x_test_rgblist.append(im_r)
  x_test_rgblist.append(im_g)
  x_test_rgblist.append(im_b)
  y_test_rgblist.append(y_train[i])
  y_test_rgblist.append(y_train[i])
  y_test_rgblist.append(y_train[i])
x_test_rgb=np.asarray(x_test_rgblist)
y_test_rgb=np.asarray(y_test_rgblist)

In [None]:
x_train_rgb.shape

(300000, 32, 32)

### Running on CPU

In [None]:
slow_model = build_model(allow_cudnn_kernel=False)

slow_model.compile(loss='sparse_categorical_crossentropy', 
                   optimizer='sgd', 
                   metrics=['accuracy'])
slow_model.fit(x_train_rgb, y_train_rgb,
          validation_data=(x_test_rgb, y_test_rgb),
          batch_size=batch_size,
          steps_per_epoch=500,
          epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f5e860f5828>

In [None]:
# Print out the accuracy of the model on the test set
%%time
scores = slow_model.evaluate(x_test_rgb, y_test_rgb)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 2.3778388500213623
Test accuracy: 0.09989999979734421
CPU times: user 43.2 s, sys: 2.81 s, total: 46 s
Wall time: 30.7 s


### Running on GPU

In [None]:
model = build_model(allow_cudnn_kernel=True)

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='sgd',
              metrics=['accuracy'])
model.fit(x_train_rgb, y_train_rgb,
          validation_data=(x_test_rgb, y_test_rgb),
          batch_size=batch_size,
          steps_per_epoch=500,
          epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fc07767eac8>

In [None]:
# Print out the accuracy of the model on the test set
%%time
scores = model.evaluate(x_test_rgb, y_test_rgb)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 2.590157985687256
Test accuracy: 0.09456666558980942
CPU times: user 3.63 s, sys: 322 ms, total: 3.95 s
Wall time: 3.46 s


# Task 3
(Bonus - you will have to do this by yourself)
Run the LSTM solution on a TPU and report performance.

We were able to connect to the TPU and get a faster result.

In [None]:
import tensorflow as tf
#Get a handle to the attached TPU. On GCP it will be the CloudTPU itself
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])

#Connect to the TPU handle and initialise it
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)





INFO:tensorflow:Initializing the TPU system: grpc://10.26.54.10:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.26.54.10:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


<tensorflow.python.tpu.topology.Topology at 0x7f1c90e9be48>

In [None]:
strategy = tf.distribute.TPUStrategy(resolver)

INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


In [None]:
with strategy.scope():
  model = build_model(allow_cudnn_kernel=False)

  model.compile(loss='sparse_categorical_crossentropy', 
                optimizer='sgd',
                metrics=['accuracy'])


In [None]:
model.fit(x_train_rgb.astype(np.float32), y_train_rgb.astype(np.float32),
          validation_data=(x_test_rgb.astype(np.float32), y_test_rgb.astype(np.float32)),
          batch_size=batch_size,
          steps_per_epoch=500,
          epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fc0d017ddd8>

In [None]:
# Print out the accuracy of the model on the test set
%%time
scores = model.evaluate(x_test_rgb.astype(np.float32), y_test_rgb.astype(np.float32))
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 2.3974902629852295
Test accuracy: 0.09726666659116745
CPU times: user 3.54 s, sys: 286 ms, total: 3.82 s
Wall time: 3.27 s
