# CIFAR-10 Image Classification 

## References
###https://towardsdatascience.com/exploratory-data-analysis-ideas-for-image-classification-d3fc6bbfb2d2

# Import Libraries
import warnings
warnings.filterwarnings('ignore')
import os
import matplotlib.pyplot as plt
import time
import calendar
from six.moves import cPickle as pickle
import numpy as np
import cv2
from skimage.feature import hog
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn import svm
from sklearn.metrics import classification_report,accuracy_score
from math import ceil
import plotly.express as px

In [None]:
# General Parameters

imageSize = 32  
channels = 3    
classes = 10    
trainingDataSize = 50000    
testDataSize = 10000        
trainingDataFiles = ('./dataset/cifar-10-batches-py/data_batch_1', './dataset/cifar-10-batches-py/data_batch_2', './dataset/cifar-10-batches-py/data_batch_3', './dataset/cifar-10-batches-py/data_batch_4','./dataset/cifar-10-batches-py/data_batch_5') 
testDataFile = './dataset/cifar-10-batches-py/test_batch' 

In [None]:
def loadData(filename):
    '''
    Load the data from the given filename

    Parameters:
    -----------
    filename: string
        The name of the file containing the data to load

    Returns:
    --------
    theSet['data']:     array of images
    theSet['labels']:   array of labels
    '''
    f = open(filename, 'rb')
    theSet = pickle.load(f,encoding='latin1')
    f.close()

    return theSet['data'], theSet['labels']

In [None]:
def convertImages(origImages):
    '''
    Convert the images from CIFAR-10 format, to an array of 10000 images each is 32 X 32 X 3 size

    Parameters:
    -----------
    origImages: array
        array of images in the CIFAR-10 format

    Returns:
    --------
    images:     array of images each in 32 X 32 X 3 size
    '''
    images = np.reshape(origImages,(-1, channels, imageSize, imageSize))
    images = np.transpose(images, (0,2,3,1))

    return images

In [None]:
def loadTrainingData(filenames):
    '''
    Load all the training data from all files

    Parameters:
    -----------
    filenames: array of string
        An array The name of the file containing the data to load

    Returns:
    --------
    trainingImages: array of the training set images
    trainingLabels: array of the training set labels
    '''

    #Pre-allocate the arrays
    trainingImages = np.zeros(shape=[trainingDataSize, imageSize, imageSize, channels], dtype=np.uint8)
    trainingLabels = np.zeros(shape=[trainingDataSize], dtype=int)

    start=0
    for fileName in filenames:

      origImages, labels = loadData(fileName)
      images = convertImages(origImages)

      numOfImages = len(images)
      end = start + numOfImages
      # print(numOfImages)
      trainingImages[start:end, :] = images
      trainingLabels[start:end] = labels
      start = end

    return trainingImages, trainingLabels

In [None]:
def loadTestData(filename):
    '''
    Load the test data

    Parameters:
    -----------
    filename: string
        The name of the file containing the test data

    Returns:
    --------
    testImages: array of images of the test data
    testLabels: array of labels of the test data
    '''

    origTestImages, testLabels = loadData(filename)
    testImages = convertImages(origTestImages)

    return testImages, testLabels

In [None]:
def currentTime():
    '''
    Returns the current time in seconds since EPOC
    Used to measure how much time each phase took

    Returns:
    --------
    the current time in second since EPOC
    '''

    return calendar.timegm(time.gmtime())

In [None]:
from google.colab import drive
drive.mount('/content/Drive', force_remount=True)

Mounted at /content/Drive


In [None]:
!mkdir -p "/content/dataset"
!tar -xzf "/content/Drive/SharedDrives/CIFAR-10/cifar-10-python.tar.gz" -C "/content/dataset"

tar (child): /content/Drive/SharedDrives/CIFAR-10/cifar-10-python.tar.gz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now


In [1]:
# Class values

label={}
label[0]='airplane' 
label[1]='automobile' 
label[2]='bird' 
label[3]='cat' 
label[4]='deer' 
label[5]='dog' 
label[6]='frog' 
label[7]='horse' 
label[8]='ship'
label[9]='truck'

In [None]:
print("Loading the training set..."),
tik = currentTime()
trainingImages, trainingLabels = loadTrainingData(trainingDataFiles)
print("Took: " + str(currentTime()-tik) + " sec" )

Loading the training set...


FileNotFoundError: ignored

## 5 images per image class

In [None]:
list_0 = []
list_1 = []
list_2 = []
list_3 = []
list_4 = []
list_5 = []
list_6 = []
list_7 = []
list_8 = []
list_9 = []

In [None]:
for i in range(trainingImages.shape[0]):

  if trainingLabels[i] == 0:
    list_0.append(trainingImages[i])

  if trainingLabels[i] == 1:
    list_1.append(trainingImages[i])
    
  if trainingLabels[i] == 2:
    list_2.append(trainingImages[i])
    
  if trainingLabels[i] == 3:
    list_3.append(trainingImages[i])
    
  if trainingLabels[i] == 4:
    list_4.append(trainingImages[i])
    
  if trainingLabels[i] == 5:
    list_5.append(trainingImages[i])
    
  if trainingLabels[i] == 6:
    list_6.append(trainingImages[i])
    
  if trainingLabels[i] == 7:
    list_7.append(trainingImages[i])
    
  if trainingLabels[i] == 8:
    list_8.append(trainingImages[i])
    
  if trainingLabels[i] == 9:
    list_9.append(trainingImages[i])

In [None]:
num=5

list_0 = list_0[:num]
list_1 = list_1[:num]
list_2 = list_2[:num]
list_3 = list_3[:num]
list_4 = list_4[:num]
list_5 = list_5[:num]
list_6 = list_6[:num]
list_7 = list_7[:num]
list_8 = list_8[:num]
list_9 = list_9[:num]

## Average Image

In [None]:
def find_mean_img(full_mat, title):
  
    mean_img = np.mean(full_mat, axis = 0,dtype=int)
    mean_img = mean_img.reshape((32,32,3))
    
    plt.imshow(mean_img,cmap='Greys_r');
    plt.title(f'Average {title}')
    plt.axis('off')
    plt.show()

In [None]:
find_mean_img(list_0,label[0])

In [None]:
find_mean_img(list_1,label[1])

In [None]:
find_mean_img(list_2,label[2])

In [None]:
find_mean_img(list_3,label[3])  

In [None]:
find_mean_img(list_4,label[4])

In [None]:
find_mean_img(list_5,label[5])

In [None]:
find_mean_img(list_6,label[6])

In [None]:
find_mean_img(list_7,label[7])

In [None]:
find_mean_img(list_8,label[8])

In [None]:
find_mean_img(list_9,label[9])



```
# This is formatted as code
```

## HOG Feature descriptors

### HOG

In [None]:
def hog_vis(img):

  cell_size = (8, 8)  # h x w in pixels
  block_size = (4, 4)  # h x w in cells
  nbins = 9  # number of orientation bins

  # winSize is the size of the image cropped to an multiple of the cell size
  hog = cv2.HOGDescriptor(_winSize=(img.shape[1] // cell_size[1] * cell_size[1],
                                    img.shape[0] // cell_size[0] * cell_size[0]),
                          _blockSize=(block_size[1] * cell_size[1],
                                      block_size[0] * cell_size[0]),
                          _blockStride=(cell_size[1], cell_size[0]),
                          _cellSize=(cell_size[1], cell_size[0]),
                          _nbins=nbins)

  n_cells = (img.shape[0] // cell_size[0], img.shape[1] // cell_size[1])
  hog_feats = hog.compute(img)\
                .reshape(n_cells[1] - block_size[1] + 1,
                          n_cells[0] - block_size[0] + 1,
                          block_size[0], block_size[1], nbins) \
                .transpose((1, 0, 2, 3, 4))  # index blocks by rows first
  # hog_feats now contains the gradient amplitudes for each direction,
  # for each cell of its group for each group. Indexing is by rows then columns.

  gradients = np.zeros((n_cells[0], n_cells[1], nbins))

  # count cells (border cells appear less often across overlapping groups)
  cell_count = np.full((n_cells[0], n_cells[1], 1), 0, dtype=int)

  for off_y in range(block_size[0]):
      for off_x in range(block_size[1]):
          gradients[off_y:n_cells[0] - block_size[0] + off_y + 1,
                    off_x:n_cells[1] - block_size[1] + off_x + 1] += hog_feats[:, :, off_y, off_x, :]
          cell_count[off_y:n_cells[0] - block_size[0] + off_y + 1,
                    off_x:n_cells[1] - block_size[1] + off_x + 1] += 1

  # Average gradients
  gradients /= cell_count

  # Preview
  plt.figure()
  plt.imshow(img)
  plt.show()

  bin = 5  # angle is 360 / nbins * direction
  plt.pcolor(gradients[:, :, bin], cmap='Greys')
  plt.gca().invert_yaxis()
  plt.gca().set_aspect('equal', adjustable='box')
  plt.colorbar()
  plt.show()

In [None]:
hog_vis(list_0[0])
print(label[0])

In [None]:
hog_vis(list_1[0])
print(label[1])

In [None]:
hog_vis(list_2[2])
print(label[2])


In [None]:
hog_vis(list_3[2])
print(label[3])


In [None]:
hog_vis(list_4[2])
print(label[4])

In [None]:
hog_vis(list_5[1])
print(label[5])

In [None]:
hog_vis(list_6[0])
print(label[6])

In [None]:
hog_vis(list_7[0])
print(label[7])

In [None]:
hog_vis(list_8[2])
print(label[8])

In [None]:
hog_vis(list_9[0])
print(label[9])

## Eigen images

In [None]:
def plot_pca(imageSet):
  
  pca = PCA()
  components = pca.fit_transform(imageSet)
  labels = {
      str(i): f"PC {i+1} ({var:.1f}%)"
      for i, var in enumerate(pca.explained_variance_ratio_ * 100)
  }

  classLabels=[]
  for i in range(10):
    classLabels.append(label[i])

  fig = px.scatter_3d(imageSet, x=1, y=2, z=3, color=classLabels)
  fig.show()

In [None]:
def plot_pca_matrix(imageSet):
  
  pca = PCA()
  components = pca.fit_transform(imageSet)
  labels = {
      str(i): f"PC {i+1} ({var:.1f}%)"
      for i, var in enumerate(pca.explained_variance_ratio_ * 100)
  }

  classLabels=[]
  for i in range(10):
    classLabels.append(label[i])

  fig = px.scatter_matrix(
      components,
      labels=labels,
      dimensions=range(4),
      color=classLabels
  )
  fig.update_traces(diagonal_visible=False)
  fig.show()

In [None]:
imageSet = np.zeros((10,32*32*3),dtype=int)

In [None]:
imageSet[0] = list_0[0].flatten()
imageSet[1] = list_1[0].flatten()
imageSet[2] = list_2[2].flatten()
imageSet[3] = list_3[2].flatten()
imageSet[4] = list_4[2].flatten()
imageSet[5] = list_5[2].flatten()
imageSet[6] = list_6[0].flatten()
imageSet[7] = list_7[2].flatten()
imageSet[8] = list_8[2].flatten()
imageSet[9] = list_9[0].flatten()

In [None]:
plot_pca(imageSet)

In [None]:
plot_pca_matrix(imageSet)