In [11]:
from keras.datasets import cifar10
from matplotlib import pyplot
import cv2 as cv
import numpy as np
import os
from sklearn.cluster import KMeans
from scipy.cluster.vq import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import sklearn.metrics as metrics

In [12]:
rdir_path = './newData'

In [13]:
def loadDataset():
    imgs = []
    labels = []
    label = -1
    
    onlyDirs = [f for f in os.listdir(rdir_path) if os.path.isdir(os.path.join(rdir_path, f))]
    
    for directory in onlyDirs:
        label = label + 1
        path = rdir_path+'/'+directory
        onlyfiles = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
        
        for file in onlyfiles:
            imgPath = path+'/'+file
            image = cv.imread(imgPath)
            imgs.append(image)
            labels.append(label)
    
    return (imgs,labels)

In [14]:
# (trainX, trainy), (testX, testy) = cifar10.load_data()
dataset = loadDataset()
trainX,testX, trainy, testy = train_test_split(dataset[0], dataset[1], 
                                                train_size=0.75, random_state=42,shuffle = True,stratify = dataset[1])

In [15]:
def getFeatures(images,labels,method=None):

    if method == 'sift':
        descriptor = cv.SIFT_create()
    elif method == 'surf':
        descriptor = cv.SURF_create()

    desc_lst=[]
    labels_lst = []
    cnt = 0
    kpts_lst=[]

    for img in images:
      img = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
      #img = cv.resize(img,(150,150),interpolation=cv.INTER_AREA)
      kpts, desc = descriptor.detectAndCompute(img,None)
      if desc is not None:
        desc_lst.append(desc)
        kpts_lst.append(kpts)
        labels_lst.append(int(labels[cnt]))
      cnt = cnt + 1
    return kpts_lst,desc_lst,labels_lst


In [16]:
kpts_lst,desc_lst,labels_lst = getFeatures(np.concatenate((trainX,testX),axis=0),np.concatenate((trainy,testy)),'sift')



In [17]:
def getDescriptorStack(desc_lst):
  descriptors_stacked = desc_lst[0]
  for desc in desc_lst[1:]:
    descriptors_stacked = np.vstack((descriptors_stacked,desc))
  return descriptors_stacked

In [18]:
descriptors_lst = getDescriptorStack(desc_lst)

In [19]:
#k means with k clusters on descriptors_lst
k = 80
voc,variance = kmeans(descriptors_lst,k,1)

In [20]:
imgfeatures = np.zeros((len(labels_lst),k),"float32")
for i in range(len(labels_lst)):
  words,distance = vq(desc_lst[i],voc)
  for j in words:
    imgfeatures[i][j] +=1

In [21]:
imgfeatures = StandardScaler().fit_transform(imgfeatures)

In [22]:
xtrain,xtest,ytrain,ytest = train_test_split(imgfeatures,labels_lst,test_size=0.1,random_state = 11)

In [23]:
#Using the logistic regression
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(xtrain, ytrain)
ypred = clf.predict(xtest)
metrics.accuracy_score(ytest, ypred)

0.9444444444444444