In [1]:
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
from sklearn.cluster import KMeans
from sklearn import svm
from sklearn.model_selection import train_test_split
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn import linear_model
from sklearn.metrics import accuracy_score


In [2]:
data_path = './data'
no_clusters = 512

In [3]:
def loadDataset(): #Function to parse through the directory and append the required images and their labels
    imgs = []
    labels = []
    label = -1
    
    directories = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f))]
    
    for directory in directories:
        label = label + 1
        path = data_path+'/'+directory
        files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
        
        for file in files:
            imgPath = path+'/'+file
            imgs.append(imgPath)
            labels.append(label)
    
    return (imgs,labels)


In [4]:
def getImages(imgsPath):
    imgs = []
    for imgName in imgsPath:
        img = cv2.imread(imgName)
        imgs.append(img)
    imgs = np.asarray(imgs,dtype=object)
    return imgs

In [5]:

def getDescriptors(sift, img): #Function to detect the key points and their descriptors
    kp, des = sift.detectAndCompute(img, None)
    return des



In [6]:
def extract(imgs): # Function to extract all the descriptors of images
    desc_list = []
    count = 0
    sift = cv2.SIFT_create()
    for img in imgs:
        desc = getDescriptors(sift,img)
        if(desc is not None):
            count = count+1
            desc_list.append(desc)
    return desc_list,count


In [7]:
def vstackDescriptors(descriptor_list): #Reorganizing the descriptor list
    descriptors = np.array(descriptor_list[0])
    for descriptor in descriptor_list[1:]:
        descriptors = np.vstack((descriptors, descriptor)) 

    return descriptors



In [8]:
def clusterDescriptors(descriptors, no_clusters): #Applying kmeans on the descriptors
    kmeans = KMeans(n_clusters = no_clusters).fit(descriptors)
    return kmeans



In [9]:
def extractFeatures(kmeans, descriptor_list, image_count, no_clusters): #Creating histograms which will later be used by the ML model as features
    im_features = np.array([np.zeros(no_clusters) for i in range(image_count)])
    for i in range(image_count):
        for j in range(len(descriptor_list[i])):
            feature = descriptor_list[i][j]
            feature = feature.reshape(1, 128)
            idx = kmeans.predict(feature)
            im_features[i][idx] += 1

    return im_features



In [10]:
def normalizeFeatures(scale, features): #Normalizing the features
    return scale.transform(features)

In [11]:
dataset = loadDataset()
trainImgPaths,testImgPaths, trainLabels, testLabels = train_test_split(dataset[0], dataset[1], 
                                                train_size=0.8, random_state=42,shuffle = True,stratify = dataset[1])

In [12]:
def train(trainImgPaths):
    trainImgs = getImages(trainImgPaths)
    desc_list,count=extract(trainImgs)
    descriptors = vstackDescriptors(desc_list)
    kmeans = clusterDescriptors(descriptors, no_clusters=no_clusters)
    im_features= extractFeatures(kmeans, desc_list, count, no_clusters=no_clusters)
    scale = StandardScaler().fit(im_features)    
    im_features = scale.transform(im_features)
    LRG = linear_model.LogisticRegression().fit(im_features, trainLabels)
    return kmeans,LRG,scale


In [13]:
kmeans,LRG,scale= train(trainImgPaths)

In [21]:
def test(testImgPaths,kmeans,LRG,scale):
    testImgs = getImages(testImgPaths)
    desc_list,count=extract(testImgs)
    descriptors = vstackDescriptors(desc_list)
    test_features = extractFeatures(kmeans, desc_list, count, no_clusters=no_clusters)
    test_features = scale.transform(test_features)
    pred=LRG.predict(test_features)
    return pred

In [23]:
pred = test(testImgPaths,kmeans,LRG,scale)

In [29]:
accuracy_score(pred, testLabels)

0.9722222222222222