<a href="https://colab.research.google.com/github/carloidangelo/ECSE-415/blob/main/A3_Q1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
import cv2
import numpy as np

%matplotlib inline

path = '/content/drive/My Drive/ECSE415/Assignments/A3/'

##Image Classification using RF and SVM

In [None]:
# function takes in a list of 2D images as input
# function delivers list of HoG features as output.
def findHOG(img_list):
  img_size = (64, 64) # h x w in pixels
  cell_size = (8, 8)  # h x w in pixels
  block_size = (4, 4)  # h x w in cells
  nbins = 4  # number of orientation bins

  # create HoG Object
  # winSize is the size of the image cropped to multiple of the cell size
  # all arguments should be given in terms of number of pixels
  hog = cv2.HOGDescriptor(_winSize=(img_size[1] // cell_size[1] * cell_size[1],
                                    img_size[0] // cell_size[0] * cell_size[0]),
                          _blockSize=(block_size[1] * cell_size[1],
                                      block_size[0] * cell_size[0]),
                          _blockStride=(cell_size[1], cell_size[0]),
                          _cellSize=(cell_size[1], cell_size[0]),
                          _nbins=nbins)
  features = []
  for i in range(np.array(img_list).shape[0]):
    features.append(hog.compute(img_list[i].astype(np.uint8)).reshape(1, -1))
  features = np.vstack(features)
  return features 

In [None]:
# function calculates classification accuracy as a percentage
def getAccuracy(list1, list2, listSize):
  counter = 0
  for i in range(listSize):
    if list1[i] == list2[i]:
      counter = counter + 1
  acc = (counter/listSize) * 100
  return acc

In [None]:
image_size = (128,128) # all images of size 128×128
image_size_new = (64,64) # must resize all images to this size
train_image_amount = 1556
test_image_amount = 90

train_images = np.load(path + 'flower_subset.npz')['train_images']
train_labels = np.load(path + 'flower_subset.npz')['train_labels']
test_images = np.load(path + 'flower_subset.npz')['test_images']
test_labels = np.load(path + 'flower_subset.npz')['test_labels']

train_images_resized = []
test_images_resized = []

# resize training images to 64 × 64
for i in range(train_image_amount):
  train_img_resized = cv2.resize(train_images[i], image_size_new, 0, 0, cv2.INTER_AREA)
  # normalize images [0,255]
  cv2.normalize(train_img_resized, train_img_resized, 0, 255, cv2.NORM_MINMAX)
  train_images_resized.append(train_img_resized)

# resize test images to 64 × 64
for j in range(test_image_amount):
  test_img_resized = cv2.resize(test_images[j], image_size_new, 0, 0, cv2.INTER_AREA)
  # normalize images [0,255]
  cv2.normalize(test_img_resized, test_img_resized, 0, 255, cv2.NORM_MINMAX)
  test_images_resized.append(test_img_resized)

# find HoG features of images
hog_train = findHOG(train_images_resized)
hog_test = findHOG(test_images_resized)

# SVM
# create non-linear SVM classifier with RBF kernel
# gamma='auto', C=1
clf_A_1 = svm.SVC(kernel = 'rbf', gamma='auto', C=1., random_state = 15)
# gamma='scale', C=10
clf_S_10 = svm.SVC(kernel = 'rbf', gamma='scale', C=10., random_state = 15)

# training
clf_A_1.fit(hog_train, train_labels)
clf_S_10.fit(hog_train, train_labels)

# predict labels of test images
predicted_labels_A_1 = clf_A_1.predict(hog_test)
predicted_labels_S_10 = clf_S_10.predict(hog_test)

# calculate classification error
acc_A_1 = getAccuracy(predicted_labels_A_1, test_labels, test_image_amount)
print("Hyperparameters: gamma='auto', C=1")
print("Classification Accuracy: " + str(acc_A_1) + " %")
acc_S_10 = getAccuracy(predicted_labels_S_10, test_labels, test_image_amount)
print("Hyperparameters: gamma='scale', C=10")
print("Classification Accuracy: " + str(acc_S_10) + " %")

Hyperparameters: gamma='auto', C=1
Classification Accuracy: 11.11111111111111 %
Hyperparameters: gamma='scale', C=10
Classification Accuracy: 63.33333333333333 %


In [None]:
# RF
# create RF classifier
# n_estimators=10, max_depth=5, criterion='entropy'
clf_10_5 = RandomForestClassifier(n_estimators=10, max_depth=5, criterion='entropy', random_state = 15)
# n_estimators=50, max_depth=10, criterion='entropy'
clf_50_10 = RandomForestClassifier(n_estimators=50, max_depth=10, criterion='entropy', random_state = 15)

# training
clf_10_5.fit(hog_train, train_labels)
clf_50_10.fit(hog_train, train_labels)

# predict labels of test images
predicted_labels_10_5 = clf_10_5.predict(hog_test)
predicted_labels_50_10 = clf_50_10.predict(hog_test)

# calculate classification error
acc_10_5 = getAccuracy(predicted_labels_10_5, test_labels, test_image_amount)
print("Hyperparameters: n_estimators=10, max_depth=5")
print("Classification Accuracy: " + str(acc_10_5) + " %")
acc_50_10 = getAccuracy(predicted_labels_50_10, test_labels, test_image_amount)
print("Hyperparameters: n_estimators=50, max_depth=10")
print("Classification Accuracy: " + str(acc_50_10) + " %")

Hyperparameters: n_estimators=10, max_depth=5
Classification Accuracy: 33.33333333333333 %
Hyperparameters: n_estimators=50, max_depth=10
Classification Accuracy: 53.333333333333336 %


The SVM classifier has a classification accuracy of 63.3% while the RF classifier has a classification accuracy of 53.3%. Therefore, in this case (random_state = 15), the SVM classifier provides better results than the RF classifier. 

In [None]:
# Comparison of SVM and RF
num_random_states = 5
acc_SVM_list = []
acc_RF_list = []
for i in range(0, 1000, 200):
  # create classifiers
  clf_SVM = svm.SVC(kernel = 'rbf', gamma='scale', C=10., random_state = i)
  clf_RF = RandomForestClassifier(n_estimators=50, max_depth=10, criterion='entropy', random_state = i)

  # training
  clf_SVM.fit(hog_train, train_labels)
  clf_RF.fit(hog_train, train_labels)

  # predict labels of test images
  predicted_labels_SVM = clf_SVM.predict(hog_test)
  predicted_labels_RF = clf_RF.predict(hog_test)

  # calculate classification error
  acc_SVM = getAccuracy(predicted_labels_SVM, test_labels, test_image_amount)
  acc_SVM_list.append(acc_SVM)
  acc_RF = getAccuracy(predicted_labels_RF, test_labels, test_image_amount)
  acc_RF_list.append(acc_RF)

acc_SVM_set = sum(acc_SVM_list)/num_random_states
print("Classification Accuracy SVM: " + str(acc_SVM_set) + " %")
acc_RF_set = sum(acc_RF_list)/num_random_states
print("Classification Accuracy RM: " + str(acc_RF_set) + " %")

Classification Accuracy SVM: 63.33333333333333 %
Classification Accuracy RM: 46.22222222222222 %


These results show that the SVM classifier is more robust to the change in random state than the RF classifier.