In [1]:
import numpy as np
import tensorflow as tf
import time
import math
import random
from numpy.random import seed
seed(1) #this is for fixing the random values
from tensorflow import set_random_seed #this is for fixing the random values
set_random_seed(2)

''' Here the things that we are going to do in this script:
1. Define your graph:
      a) Create Convolutional Layers.
      b) Create Fully-connected and Flatten Layers
      c) Create Other optimizer, loss function and others
2. Prepare your input dataset for training. 
3. Evaluating your own results. 
''' 

Let us firstly now try to read and format the data according to our requirement.....
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. 

The dataset is divided into five training batches and one test batch, each with 10000 images. 

The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. 

Between them, the training batches contain exactly 5000 images from each class.
#### We are going to use batch 1-4 as training dataset, batch 5 as validation dataset and test batch for testing
The Cifar-10 dataset downloaded is in the pickle format of python, so firstly we need to unpickle it using the following function

In [2]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [3]:
train_batch_1=unpickle('data_batch_1')
train_batch_2=unpickle('data_batch_2')
train_batch_3=unpickle('data_batch_3')
train_batch_4=unpickle('data_batch_4')
val_batch=unpickle('data_batch_5')
test_batch=unpickle('test_batch')

for key in train_batch_1:
    print(key,len(train_batch_1[b'data']))

b'filenames' 10000
b'batch_label' 10000
b'data' 10000
b'labels' 10000


Below function converts key of the dictonary from bytes to string

In [4]:
def convert(data):
    if isinstance(data, bytes):  return data.decode('ascii')
    if isinstance(data, dict):   return dict(map(convert, data.items()))
    if isinstance(data, tuple):  return map(convert, data)
    return data

In [5]:
train_batch_1=convert(train_batch_1)
train_batch_2=convert(train_batch_2)
train_batch_3=convert(train_batch_3)
train_batch_4=convert(train_batch_4)
val_batch=convert(val_batch)
test_batch=convert(test_batch)

### Separation of the Data and labels

In [6]:
train_Data=np.concatenate((train_batch_1['data'],train_batch_2['data'],train_batch_3['data'],train_batch_4['data']),axis=0).astype(np.float32)
train_Labels=np.concatenate((train_batch_1['labels'],train_batch_2['labels'],train_batch_3['labels'],train_batch_4['labels']),axis=0).astype(np.float32)
val_data=np.array(val_batch['data']).astype(np.float32)
val_label=np.array(val_batch['labels']).astype(np.float32)
test_data=np.array(test_batch['data']).astype(np.float32)
test_label=np.array(test_batch['labels']).astype(np.float32)

In [7]:
def label_format_changer(labels):
    label_changed=np.zeros((labels.shape[0],10))    
    for i in range(labels.shape[0]):
           label_changed[i][int(labels[i])]=1.0
    return label_changed   

train_Labels=label_format_changer(train_Labels)
val_label=label_format_changer(val_label)
test_label=label_format_changer(test_label)


Reshaping of the Data in to Images of 40000x32x32x3 for training  
Reshaping of the Data in to Images of 10000x32x32x3 for validation  
Reshaping of the Data in to Images of 10000x32x32x3 for testing

In [8]:
train_Data = train_Data.reshape(40000,3,32,32).transpose(0,2,3,1)
val_data = val_data.reshape(10000,3,32,32).transpose(0,2,3,1)
test_data = test_data.reshape(10000,3,32,32).transpose(0,2,3,1)
#print(train_Data.shape)

In [9]:
class DatasetHandler():
    def __init__(self, images, labels):
        self._num_examples = images.shape[0]
        self._images = images
        self._labels = labels
        self._epochs_done = 0
        self._index_in_epoch = 0
        
    def next_batch(self, batch_size):
        """Return the next `batch_size` examples from this data set."""
        start = self._index_in_epoch
        self._index_in_epoch += batch_size

        if self._index_in_epoch > self._num_examples:
          # After each epoch we update this
            self._epochs_done += 1
            start = 0
            self._index_in_epoch = batch_size
            assert batch_size <= self._num_examples
        end = self._index_in_epoch
        #print(self._index_in_epoch)

        return self._images[start:end], self._labels[start:end]

In [10]:
batch_size=50
img_size=32
num_channels=3
number_classes=10

In [11]:
session=tf.Session()
x=tf.placeholder(tf.float32,[None,img_size,img_size,num_channels])

#labels
y_true=tf.placeholder(tf.float32,[None,number_classes])
y_true_cls=tf.argmax(y_true,axis=1)

### Network Graph Parameters

In [12]:
filter_size_conv1=5
num_filters_conv1=6

filter_size_conv2=5
num_filters_conv2=10

fc_layer_1_size=120
fc_layer_2_size=84
fc_last_layer=10

In [13]:
def create_weights(shape):
    return tf.Variable(tf.truncated_normal(shape,stddev=0.05))

In [14]:
def create_biases(size):
    return tf.Variable(tf.constant(0.05,shape=[size]))

In [15]:
def create_convolutional_layer(input,
                        num_input_channels,
                        conv_filter_size,
                        num_filters):
    # we shall define the weights that will be trained using create_weights function
    weights=create_weights(shape=[conv_filter_size,conv_filter_size,num_input_channels,num_filters])
    ## We create biases using the create_biases function. These are also trained
    biases=create_biases(num_filters)
    
    ## Creating the convolutional layer
    layer=tf.nn.conv2d(input=input,
                       filter=weights,
                       strides=[1,1,1,1],
                       padding='SAME')
    layer+=biases
    #order of relu and maxpooling doesn't matter in terms of answer, but it matters in computational efficiency
    #max-pooling-> relu is more time effiecient
    #We shall be using max-pooling
    layer=tf.nn.max_pool(value=layer,
                         ksize=[1,2,2,1],
                         strides=[1,2,2,1],
                         padding='SAME')
    ## output of pooling is fed to Relu which is the activation function for us
    layer=tf.nn.relu(layer)
    return layer
                                

In [16]:
def create_flatten_layer(layer):
    #we know that the shape of the layer will be  [batch_size img_size img_size num_channels]
    #but lets's get it from the previous layer
    layer_shape=layer.get_shape()
    print(layer_shape)
     ## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
    num_features=layer_shape[1:4].num_elements()
    
    ##Now, we Flatten the layer so we shall have to reshape to num_features
    layer=tf.reshape(layer,[-1,num_features])
    print('shape of the flat layer')
    print(layer.shape)
    
    return layer

In [17]:
def create_fc_layer(input,
                    num_input,
                    num_outputs,
                    use_relu=True):
    #Let's define trainable weights and biases
    weights=create_weights(shape=[num_input,num_outputs])
    biases=create_biases(num_outputs)
    
    #Fully Connected layer takes input x and produces wx+b.since, these are matrices, we use matmul function in Tensorflow
    layer=tf.matmul(input,weights)+biases
    if use_relu:
        layer=tf.nn.relu(layer)
    print('shape of the fully connected layer')
    print(layer.shape)
    return layer

In [18]:
layer_conv1=create_convolutional_layer(input=x,
                                      num_input_channels=num_channels,
                                      conv_filter_size=filter_size_conv1,
                                      num_filters=num_filters_conv1)

layer_conv2=create_convolutional_layer(input=layer_conv1,
                                      num_input_channels=num_filters_conv1,
                                      conv_filter_size=filter_size_conv2,
                                      num_filters=num_filters_conv2)

layer_flat=create_flatten_layer(layer_conv2)

layer_fc1=create_fc_layer(input=layer_flat,
                         num_input=layer_flat.get_shape()[1:4].num_elements(),
                         num_outputs=fc_layer_1_size,
                         use_relu=True)


layer_fc2=create_fc_layer(input=layer_fc1,
                         num_input=fc_layer_1_size,
                         num_outputs=fc_layer_2_size,
                         use_relu=True)


layer_fc3=create_fc_layer(input=layer_fc2,
                         num_input=fc_layer_2_size,
                         num_outputs=number_classes,
                         use_relu=False)
y_pred=tf.nn.softmax(layer_fc3,name='y_pred')

y_pred_cls=tf.argmax(y_pred,axis=1)

session.run(tf.global_variables_initializer())

cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc3,
                                                         labels=y_true)

cost=tf.reduce_mean(cross_entropy)

optimizer=tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)

correct_prediction=tf.equal(y_pred_cls,y_true_cls)

accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

session.run(tf.global_variables_initializer())

(?, 8, 8, 10)
shape of the flat layer
(?, 640)
shape of the fully connected layer
(?, 120)
shape of the fully connected layer
(?, 84)
shape of the fully connected layer
(?, 10)


In [19]:
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
    acc = session.run(accuracy, feed_dict=feed_dict_train)
    val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
    msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%},  Validation Loss: {3:.3f}"
    print(msg.format(epoch + 1, acc, val_acc, val_loss))

In [20]:
total_iterations = 0
saver=tf.train.Saver()

In [22]:
data_Train=DatasetHandler(train_Data,train_Labels)
data_Val=DatasetHandler(val_data,val_label)
def train (num_iteration):
    global total_iterations
    
    for i in range(total_iterations,total_iterations+num_iteration):
        
        x_batch,y_true_batch=data_Train.next_batch(batch_size)
      
        #print(data_Val._index_in_epoch)
        feed_dict_tr={x:x_batch,
                      y_true:y_true_batch}
        
        
        session.run(optimizer,feed_dict=feed_dict_tr)
        
        if i % int(data_Train._num_examples/batch_size) == 0:
            epoch = int(i / int(data_Train._num_examples/batch_size))
            x_valid_batch,y_valid_batch=data_Val.next_batch(val_label.shape[0])
            feed_dict_val={x:x_valid_batch,
                                y_true:y_valid_batch}
            val_loss = session.run(cost, feed_dict=feed_dict_val)           
            show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
    total_iterations+=num_iteration

train(num_iteration=96000)
        
        

Training Epoch 2 --- Training Accuracy:  52.0%, Validation Accuracy:  42.0%,  Validation Loss: 1.635
Training Epoch 3 --- Training Accuracy:  52.0%, Validation Accuracy:  47.0%,  Validation Loss: 1.503
Training Epoch 4 --- Training Accuracy:  58.0%, Validation Accuracy:  50.2%,  Validation Loss: 1.419
Training Epoch 5 --- Training Accuracy:  58.0%, Validation Accuracy:  51.7%,  Validation Loss: 1.373
Training Epoch 6 --- Training Accuracy:  60.0%, Validation Accuracy:  53.0%,  Validation Loss: 1.336
Training Epoch 7 --- Training Accuracy:  60.0%, Validation Accuracy:  53.9%,  Validation Loss: 1.307
Training Epoch 8 --- Training Accuracy:  58.0%, Validation Accuracy:  55.0%,  Validation Loss: 1.280
Training Epoch 9 --- Training Accuracy:  60.0%, Validation Accuracy:  55.9%,  Validation Loss: 1.257
Training Epoch 10 --- Training Accuracy:  60.0%, Validation Accuracy:  56.5%,  Validation Loss: 1.238
Training Epoch 11 --- Training Accuracy:  62.0%, Validation Accuracy:  57.1%,  Validation 

KeyboardInterrupt: 