# **The CIFAR-10 dataset**

### The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.

### The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class. 

### Here are the classes in the dataset:

* airplane

* automobile

* bird

* cat
 
* deer

* dog 

* frog

* horse

* ship

* truck



In [1]:
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from numpy import mean
from numpy import std
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from tensorflow.keras.utils import plot_model


from keras.datasets import cifar10

In [2]:
# Load train and test dataset

def load_dataset():
  (trainX, trainY), (testX, testY) = cifar10.load_data()

# reshape dataset to have single channel
  # shape[0] will give no of data , image width and hight , color perameter gray = 1, rgb 3
  # trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
  # testX = testX.reshape((testX.shape[0], 28, 28, 1))

  print("Shape of training data:")
  print(trainX.shape)
  print(trainY.shape)
  print("Shape of test data:")
  print(testX.shape)
  print(testY.shape)

  # One hot encode target values since problem is multi class(with int 0 to 9), so instead of [0 1 2 3] it will be [1 0 0] for 1, [010] for 2

  trainY = to_categorical(trainY)
  testY = to_categorical(testY)

  return trainX, trainY, testX, testY

In [4]:
# Scale Pixels 
def prep_pixels(train,test):

# convert from integers to floats
  train_norm = train.astype('float32')
  test_norm= test.astype('float32')

# normalize to range 0-1 
  train_norm = train_norm / 255.0   # divided by 255 as it was max range for gray scale
  test_norm = test_norm / 255.0

# return normalized range
  return train_norm, test_norm

In [5]:
# define CNN model
def define_model():
  
  model = Sequential()
  
  # HIDDEN LAYER 1  
  model.add(Conv2D(32, (3,3), activation='relu',padding='same', kernel_initializer='he_uniform', input_shape=(32,32,3)))
  model.add(Conv2D(32, (3,3), activation='relu',padding='same', kernel_initializer='he_uniform'))
  model.add(MaxPooling2D((2,2)))  # stride will be 2 as we didnt define it it will take default null which is same as pool size

  # HIDDEN LAYER 2
  model.add(Conv2D(64, (3,3), activation='relu',padding='same', kernel_initializer='he_uniform'))
  model.add(Conv2D(64, (3,3), activation='relu',padding='same', kernel_initializer='he_uniform'))
  model.add(MaxPooling2D((2,2)))  # stride will be 2 as we didnt define it it will take default null which is same as pool size

  # HIDDEN LAYER 3
  model.add(Conv2D(128, (3,3), activation='relu',padding='same', kernel_initializer='he_uniform'))
  model.add(Conv2D(128, (3,3), activation='relu',padding='same', kernel_initializer='he_uniform'))
  model.add(MaxPooling2D((2,2)))  # stride will be 2 as we didnt define it it will take default null which is same as pool size

  # flatten node before giving to NN
  model.add(Flatten())


  model.add(Dense(128 , activation='relu', kernel_initializer='he_uniform'))  # 128 node
  model.add(Dense(10, activation='softmax')) # 10 node in output as we have 10 classes

# Compile model

  # Stocastic gradient descent
  opt = SGD(learning_rate=0.001, momentum=0.9)
  model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) 
  # we are dealing with multiclass classification so we use categorical_crossentropy as loss function
  
  model.summary()
  plot_model(model, to_file = 'CNN_seq.png')
  
  return model

In [6]:
# run the test harness for evaluating a model
def run_test_harness():
	# load dataset
	trainX, trainY, testX, testY = load_dataset()
	# prepare pixel data
	trainX, testX = prep_pixels(trainX, testX)
	# define model
	model = define_model()
	# fit model
	model.fit(trainX, trainY, epochs=10, batch_size=32, verbose=2)
	# save model to use in other files
	model.save('final_model.h5')


In [7]:
# entry point, run the test harness
run_test_harness()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Shape of training data:
(50000, 32, 32, 3)
(50000, 1)
Shape of test data:
(10000, 32, 32, 3)
(10000, 1)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 32)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 64)        18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 16, 16, 64)        36928     
      