In [None]:
import time
import warnings
warnings.filterwarnings('ignore')

# Loading the dataset

In [None]:
from keras.datasets import cifar10

start = time.time()
# Loading the dataset
(data, labels),_ = cifar10.load_data()
end = time.time()
print(f'Dataset loaded in {round(end-start,2)} seconds')

c10 = {'data': data, 'target': labels}

# Printing dataset overview
samples = c10['data'].shape[0]
features_per_sample = c10['data'].shape[1:]
classes = len(set(c10['target'].flatten()))

print(f'Samples : {samples}')
print(f'Features per sample : {features_per_sample}')
print(f'Classes : {classes}')

Dataset loaded in 1.2725331783294678 seconds
Samples : 50000
Features per sample : (32, 32, 3)
Classes : 10


# Normalization and Flattening

In [None]:
import numpy as np

# Normalization of data between 0 and 1
c10['data'] = c10['data'].astype('float32') / 255.0

# Flattening of images
c10['data'] = c10['data'].reshape(samples, -1)

# Dataset overview after flattening
features_per_sample = c10['data'].shape[1]
print(f'Samples : {samples}')
print(f'Features per sample : {features_per_sample}')

Samples : 50000
Features per sample : 3072


# Extracting HoG(Histogram of Oriented Gradients) features

In [None]:
from skimage.feature import hog
from skimage.color import rgb2gray

hog_features = []

# Function to compute hog features of every image
start = time.time()

for image in c10['data']:
  image = image.reshape((32,32,3))
  gray_scaled_image = rgb2gray(image)
  # finding the hog vector from gray-scaled image
  hog_vector = hog(gray_scaled_image,orientations = 9,pixels_per_cell=(8,8),cells_per_block=(2,2),visualize=False,multichannel=False)
  hog_features.append(hog_vector)

end = time.time()

print(f'Extraction completed in {round(end-start,2)} seconds')
c10_hog_features = np.array(hog_features)

print(f'Samples : {c10_hog_features.shape[0]}')
print(f'Features per sample : {c10_hog_features.shape[1]}')

Extraction completed in 44.72910761833191 seconds
Samples : 50000
Features per sample : 324


# Train-Test Split (80:20)

In [None]:
from sklearn.model_selection import train_test_split

# Splitting into training and testing data
X_train,X_test,y_train,y_test = train_test_split(c10_hog_features,c10['target'],train_size=0.8,random_state=42)

print(f'Size of training data : {X_train.shape}')
print(f'Size of testing data : {X_test.shape}')

Size of training data : (40000, 324)
Size of testing data : (10000, 324)


# SVM Classifiers with different kernels

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# different kernels for SVC
kernels = ['linear','poly','rbf','sigmoid']

for kernel in kernels:
  start = time.time()
  clf = SVC(kernel=kernel)
  clf.fit(X_train,y_train)
  y_pred = clf.predict(X_test)
  accuracy = accuracy_score(y_test,y_pred)
  end = time.time()
  print(f'For kernel {kernel}, accuracy : {accuracy} Completed in {round(end-start,2)} seconds')

# Grid Search for hyperparameter tuning on RBF(Radial Basis Function) kernel

In [None]:
from sklearn.model_selection import GridSearchCV

# set of values of 'C' and 'gamma'
hyperparameters = {'C':[0.1, 1, 10, 100],
                    'gamma': [0.001, 0.01, 0.1, 1, 10]}

rbf_clf = SVC(kernel='rbf')

# performing grid search
grid_search = GridSearchCV(rbf_clf, hyperparameters, cv=5, scoring='accuracy', n_jobs=-1)

grid_search.fit(X_train, y_train)

# best set of 'C' and 'gamma'
print(f'The ideal hyperparameters are : {grid_search.best_params_}')

print(f'Cross validation accuracy : {grid_search.best_score_}')

# prediction using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print(f'Accuracy after hyper-parameter tuning : {accuracy}')