<a href="https://colab.research.google.com/github/joeragaa/Image-classification/blob/main/cvision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Image classification on the CIFAR100 dataset
This notebook compares the performance of multiple classifiers on the task of image classification with the CIFAR100 dataset. 
The classifiers used are SVM, KNN, Kmeans and CNN.
KNN and Kmeans classifiers were built using numpy and imported from a custom file. SVM classifier is used from Scikit-learn and lastly the convolutional neural network model was built and trained using tensoflow.

In [None]:
#mounting google drive to import helper module later on
from google.colab import drive
drive.mount('/content/drive')
  

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#the first classfier used is the CNN
import tensorflow as tf
import matplotlib as plt

#Init
batch_size = 50
no_epochs = 50
validation_split = 0.2
verbosity = 1

#load cifar100 dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar100.load_data()

#scale valuse from 0 to 1
X_train = X_train / 255
X_test = X_test / 255

#Build convolutional network
#network with two stages of convolution and max pooling followed
#by a hidden layer with 256 nodes and a 50% dropout to prevent overfitting
#followed by output layer with 100 nodes represeting the 100 classes of the dataset
model = tf.keras.Sequential(
    [
     tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation="relu", input_shape=(32, 32, 3)),
     tf.keras.layers.MaxPooling2D((2, 2), strides=2),

     tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation="relu"),
     tf.keras.layers.MaxPooling2D((2, 2), strides=2),

     tf.keras.layers.Flatten(),
     tf.keras.layers.Dense(256, activation="relu"),
     tf.keras.layers.Dropout(0.5),
     tf.keras.layers.Dense(100, activation="softmax")
    ]
)
print(model.summary())
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy']
              )

#train data
history = model.fit(X_train, y_train,
            batch_size=batch_size,
            epochs=no_epochs,
            verbose=verbosity,
            validation_split=validation_split)

#saving the trained model
model.save("cnn_model.h5")

score = model.evaluate(X_test, y_test, verbose=0)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')








Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 16, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 8, 8, 64)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 4096)              0         
                                           

  return dispatch_target(*args, **kwargs)


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 4.246292591094971 / Test accuracy: 0.350600004196167


In [None]:
# for the following classifiers HOG features are extracted first and then the classifer
# is trained in the produced features.
# the feature vector produced is loaded into a pandas dataframe and saved to csv file 
# for further computation outside the notebook if desired.
import tensorflow as tf
import pandas as pd
from skimage.feature import hog
dataset = tf.keras.datasets.cifar100
(x_train, y_train), (x_test, y_test) = dataset.load_data()
#extract hog features from images in the dataset
x_train_features = list(map(lambda x: hog(x,orientations=9, pixels_per_cell=(4, 4),
                     cells_per_block=(2, 2), visualize=False,multichannel = True),x_train))
x_test_features = list(map(lambda x: hog(x,orientations=9, pixels_per_cell=(4, 4),
                     cells_per_block=(2, 2), visualize=False,multichannel = True),x_test))
dataframe = pd.DataFrame(x_train_features)
dataframe['label'] = y_train
dataframe.to_csv('cifar100_train_hog.csv')
dataframe_test = pd.DataFrame(x_test_features)
dataframe_test['label'] = y_test
dataframe_test.to_csv('cifar100_test_hog.csv')

In [None]:
from sklearn.model_selection import train_test_split
import helper
from sklearn.metrics import accuracy_score, confusion_matrix
from skimage.feature import hog
from sklearn import datasets
from sklearn import svm
import numpy as np
import tensorflow as tf

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar100.load_data()

X_train = np.array(list(map(lambda x: hog(x,orientations=9, pixels_per_cell=(4, 4),
                    cells_per_block=(2, 2), visualize=False,multichannel = True),X_train)))
X_test = np.array(list(map(lambda x: hog(x,orientations=9, pixels_per_cell=(4, 4),
                     cells_per_block=(2, 2), visualize=False,multichannel = True),X_test)))


knn_classifier = helper.knn(x_train=X_train, y_train=y_train, k=5)
pred = knn_classifier.predict(x_test=X_test)
print(f"knn accuracy: {accuracy_score(y_test, pred)}")


knn accuracy: 0.2014


In [None]:
kmeans_classifier = helper.kmeans(X_train, y_train, 100, iter=50)
kmeans_pred = kmeans_classifier.predict(x_test=X_test).flatten()
print(f"kmeans accuracy: {accuracy_score(y_test.flatten(), kmeans_pred)}")


kmeans accuracy: 0.1134


In [None]:
svm_classifier = svm.LinearSVC(C=5,max_iter=100)
svm_classifier.fit(X_train, y_train)
svm_predictions = svm_classifier.predict(X_test)
print(f"svm accuracy: {accuracy_score(y_test.flatten(), svm_predictions)}")


  y = column_or_1d(y, warn=True)


svm accuracy: 0.1541


