In [1]:
import pickle
import cv2 as cv
import numpy as np
from scipy.cluster.vq import kmeans, vq
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
# ! pip install opencv-python==3.4.2.16
# ! pip install opencv-contrib-python==3.4.2.16



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [5]:
dict1 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_1')
dict2 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_2')
dict3 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_3')
dict4 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_4')
dict5 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_5')

In [6]:
arr = np.array(dict1.get(b'data'))
arr = np.append(arr, dict2.get(b'data'), axis = 0)
arr = np.append(arr, dict3.get(b'data'), axis = 0)
arr = np.append(arr, dict4.get(b'data'), axis = 0)
arr = np.append(arr, dict5.get(b'data'), axis = 0)

In [7]:
# arr = np.array(dict.get(b'data'))
arr = np.array(arr)

In [8]:
# extractor = cv.xfeatures2d.SIFT_create()
extractor = cv.xfeatures2d.SURF_create()

def features(image, extractor):
    keypoints, descriptors = extractor.detectAndCompute(image, None)
    return [keypoints, descriptors]

In [9]:
descriptor_list = []
for i in range(len(arr)) : 
    img = arr[i].reshape(32, 32, 3)
    gray_img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
    gray_img = cv.resize(gray_img, (50, 50), interpolation = cv.INTER_AREA)
    keypoints, descriptors = features(gray_img, extractor)
    descriptor_list.append((i, descriptors))

In [10]:
descriptors = descriptor_list[0][1]
for image_path, descriptor in descriptor_list[1:] :
    if descriptor is not None :
        descriptors = np.vstack((descriptors, descriptor))

In [11]:
descriptors.shape

(302597, 64)

In [12]:
descriptors_float = descriptors.astype(np.float32)

In [13]:
k = 300
voc, variance = kmeans(descriptors_float, k, 1)
# voc i.e. the vocabulary is now our codebook

In [14]:
# Creating the training data features using vq which maps all the words matching from the vocabulary
im_features=np.zeros((len(arr), k), dtype = np.float32)
for i in range(len(arr)):
    if descriptor_list[i][1] is not None :
        words, distance = vq(descriptor_list[i][1], voc)
        for w in words :
            im_features[i][w]+=1

In [15]:
stdslr = StandardScaler().fit(im_features)
im_features = stdslr.transform(im_features)

In [16]:
# Getting all the labels for all the images of the training data
y_train = np.array(dict1.get(b'labels'))
y_train = np.append(y_train, dict2.get(b'labels'), axis = 0)
y_train = np.append(y_train, dict3.get(b'labels'), axis = 0)
y_train = np.append(y_train, dict4.get(b'labels'), axis = 0)
y_train = np.append(y_train, dict5.get(b'labels'), axis = 0)
print(y_train.shape)

(50000,)


In [17]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

clf = LogisticRegression(n_jobs = -1, max_iter = 3000)
# clf=LinearSVC(max_iter = 80000)
# clf=LinearSVC()
# clf = SVC(max_iter = 80000)
# clf = KNeighborsClassifier(n_neighbors = 10, n_jobs = -1)
clf.fit(im_features, np.array(y_train))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=3000,
                   multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [18]:
# Loading the Test Data
dict_test = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/test_batch')
arr_test = np.array(dict_test.get(b'data'))
arr_test.shape

(10000, 3072)

In [19]:
des_list_test=[]

for i in range(len(arr_test)) : 
    img = arr_test[i].reshape(32, 32, 3)
    gray_img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
    gray_img = cv.resize(gray_img, (50, 50), interpolation = cv.INTER_AREA)
    keypoints_test, descriptors_test = features(gray_img, extractor)
    des_list_test.append((i, descriptors_test))

In [20]:
# Matching the test features with the vocabulary obtained earlier using KMeans withcodebook size of 'k'
test_features = np.zeros((len(arr_test), k), dtype ="float32")
for i in range(len(arr_test)):
    if des_list_test[i][1] is not None :
      words,distance = vq(des_list_test[i][1].astype(float), voc)
      for w in words:
          test_features[i][w]+=1

In [21]:
test_features = stdslr.transform(test_features)

In [22]:
predict_label = clf.predict(test_features)

In [23]:
df = pd.DataFrame()
df['Predicted'] = predict_label
df['Actual'] = dict_test.get(b'labels')

In [24]:
clf.score(test_features, dict_test.get(b'labels'))

0.2452

In [25]:
accuracy=accuracy_score(dict_test.get(b'labels'), predict_label)
print('Accuracy =', accuracy)

Accuracy = 0.2452


In [26]:
df.tail(50)

Unnamed: 0,Predicted,Actual
9950,7,3
9951,8,0
9952,7,5
9953,3,7
9954,0,0
9955,9,8
9956,6,0
9957,0,0
9958,5,9
9959,3,2
