In [1]:
import pickle
import cv2 as cv
import numpy as np
from scipy.cluster.vq import kmeans, vq
from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.metrics import confusion_matrix,accuracy_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# ! pip install opencv-contrib-python==3.4.2.16

In [4]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [5]:
dict1 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_1')
dict2 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_2')
dict3 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_3')
dict4 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_4')
dict5 = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/data_batch_5')

In [6]:
arr = np.array(dict1.get(b'data'))
arr = np.append(arr, dict2.get(b'data'), axis = 0)
arr = np.append(arr, dict3.get(b'data'), axis = 0)
arr = np.append(arr, dict4.get(b'data'), axis = 0)
arr = np.append(arr, dict5.get(b'data'), axis = 0)
arr = np.array(arr)

In [7]:
# Loading the Test Data
dict_test = unpickle('/content/drive/MyDrive/Visual Recognition/cifar-10-batches-py/test_batch')
arr_test = np.array(dict_test.get(b'data'))
arr_test.shape

(10000, 3072)

In [8]:
extractor = cv.xfeatures2d.SURF_create()

def features(image, extractor):
    extractor.setHessianThreshold(400)
    keypoints, descriptors = extractor.detectAndCompute(image, None)
    return keypoints, descriptors

In [9]:
def getDescriptors(arr) : 
    descriptors = []
    
    for i in range(len(arr)) :
        img = arr[i].reshape(32, 32, 3)
        gray_img = cv.cvtColor(img, cv.COLOR_RGB2GRAY) 
        gray_img = cv.resize(gray_img, (160, 160), interpolation = cv.INTER_AREA)
        kp, des = features(gray_img, extractor)
        if des is not None : 
            descriptors.append(des)
            
    descriptors = np.concatenate(descriptors, axis=0)
    descriptors = np.asarray(descriptors)
        
    return descriptors

In [10]:
# descriptors = getDescriptors(arr)
# descriptors_test = getDescriptors(arr_test)
all_descriptors = getDescriptors(np.concatenate((arr, arr_test), axis = 0))

In [11]:
print('Descriptors shape =', all_descriptors.shape)

Descriptors shape = (7999252, 64)


In [12]:
def VLAD(X, visualDictionary) : 
    
    predictedLabels = visualDictionary.predict(X)
    centers = visualDictionary.cluster_centers_
    labels = visualDictionary.labels_
    k = visualDictionary.n_clusters
    
    m,d = X.shape
    V=np.zeros([k,d])
    #computing the differences

    # for all the clusters (visual words)
    for i in range(k):
        # if there is at least one descriptor in that cluster
        if np.sum(predictedLabels==i)>0:
            # add the diferences
            V[i]=np.sum(X[predictedLabels==i,:]-centers[i],axis=0)
    

    V = V.flatten()
    # power normalization, also called square-rooting normalization
    V = np.sign(V)*np.sqrt(np.abs(V))

    # L2 normalization
    V = V/np.sqrt(np.dot(V,V))
    
    return V

In [13]:
def getVLADDescriptors(images, images_lables, visualDic):
    descriptors = []
    labels = []
    
    count = 0
    for image in images : 
        image = image.reshape(32, 32, 3)
        image = cv.cvtColor(image, cv.COLOR_RGB2GRAY)
        image = cv.resize(image, (160, 160), interpolation = cv.INTER_AREA)
        kp, des = features(image, extractor)
        if des is not None : 
            v = VLAD(des, visualDic)
            descriptors.append(v)
            labels.append(images_lables[count])
        count += 1            
            
    descriptors = np.asarray(descriptors)
    return descriptors, labels

In [14]:
k = 20
visDic = MiniBatchKMeans(init='k-means++', n_clusters = k, max_iter=1000, batch_size=1000, n_init=10, max_no_improvement=10, 
                         verbose=0).fit(all_descriptors)
# visDic = KMeans(n_clusters = k, random_state=0).fit(all_descriptors)

In [15]:
y_train = np.array(dict1.get(b'labels'))
y_train = np.append(y_train, dict2.get(b'labels'), axis = 0)
y_train = np.append(y_train, dict3.get(b'labels'), axis = 0)
y_train = np.append(y_train, dict4.get(b'labels'), axis = 0)
y_train = np.append(y_train, dict5.get(b'labels'), axis = 0)
print(y_train.shape)

(50000,)


In [16]:
train_features, train_labels = getVLADDescriptors(arr, y_train, visDic)
test_features, test_labels = getVLADDescriptors(arr_test, dict_test.get(b'labels'), visDic)

In [17]:
print('For Training')
print('Features shape =', len(train_features))
print('Labels shape', len(train_labels))
print('***********************************')
print('For Testing')
print('Features shape =', len(test_features))
print('Labels shape', len(test_labels))

For Training
Features shape = 50000
Labels shape 50000
***********************************
For Testing
Features shape = 10000
Labels shape 10000


In [18]:
stdslr = StandardScaler().fit(train_features)
train_features = stdslr.transform(train_features)
test_features = stdslr.fit_transform(test_features) 

In [19]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

clf = LogisticRegression(n_jobs = -1)
# clf = LinearSVC(max_iter = 80000)
# clf=LinearSVC()
# clf = SVC(max_iter = 6000)
# clf = KNeighborsClassifier(n_neighbors = 3, n_jobs = -1)
clf.fit(train_features, np.array(train_labels))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=-1, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [20]:
# Predicting the labels using the trained model obtained
predict_label = clf.predict(test_features)

In [21]:
accuracy = accuracy_score(test_labels, predict_label)
print(accuracy)

0.3896


In [22]:
df = pd.DataFrame({'True' : test_labels, 'Predicted' : predict_label})
df.head(50)

Unnamed: 0,True,Predicted
0,3,6
1,8,9
2,8,8
3,0,8
4,6,6
5,6,4
6,1,6
7,6,4
8,3,4
9,1,5
