In [151]:
%matplotlib inline
import numpy
import cv2
from matplotlib import pyplot as plt
from scipy.cluster.vq import vq, kmeans, whiten
import numpy as np
import os

def generateTrainOrTestData(path):
    
    bagOfWords = []
    categoryList = []

    for root, category_dirs, a in os.walk(path):
  
        for category_dir in category_dirs:
            
            path_with_category = os.path.join(root,category_dir)
            
            for _, _, imageNames in os.walk(path_with_category):
                
                print("Extracting features of category: " + category_dir)
                for imageName in imageNames:
                    if(imageName != '.DS_Store'):
                        pathToImage = os.path.join(path_with_category,imageName)
 
                        img1 = cv2.imread(pathToImage,0)
                        
                        sift = cv2.xfeatures2d.SIFT_create()
                        kp1, des1 = sift.detectAndCompute(img1,None)
                        
                        bagOfWords = bagOfWords + [des1]
                        categoryList = categoryList + [category_dir]
                        
    return (bagOfWords,categoryList)




In [152]:
# Generate Test and Train data

trainDataDir = '/Users/dsoellinger/Documents/git/uni/Computer-Vision/EX2/images/train'
testDataDir = '/Users/dsoellinger/Documents/git/uni/Computer-Vision/EX2/images/test'

print("Extracting features of train data...")
trainDataFeatures, trainDataCategories = generateTrainOrTestData(trainDataDir)
print("Finish extracting features of train data...")
print("Extracting features of test data...")
testDataFeatures, testDataCategories = generateTrainOrTestData(testDataDir)
print("Finish extracting features of test data...")


Extracting features of train data...
Extracting features of category: bedroom
Extracting features of category: CALsuburb
Extracting features of category: industrial
Extracting features of category: kitchen
Finish extracting features of train data...
Extracting features of test data...
Extracting features of category: bedroom
Extracting features of category: CALsuburb
Extracting features of category: industrial
Extracting features of category: kitchen
Finish extracting features of test data...


In [178]:
def listConversion(features,category):
    
    convFeatures = []
    convCategories = []
    for i,feature in enumerate(features):
        
        for keypoint_des in feature:
            convFeatures.append(keypoint_des.tolist())
            convCategories.append(category[i])
            
    return (convFeatures,convCategories)
   
    
X,Y = listConversion(trainDataFeatures,trainDataCategories)

print("KMeans start")
voc,distortion = kmeans(X,15,10)
print("KMeans done")


KMeans start
KMeans done


In [179]:
from __future__ import division

def normalizeHistogram(histogram):

    total = sum(histogram)

    normalizedHistogram = [x / total for x in histogram]

    return normalizedHistogram

In [180]:
def computeImageHistogram(code):
    
    histogram = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    
    for c in code:
        histogram[c] += 1
        
    normalizedHistogram = normalizeHistogram(histogram)
    
    return normalizedHistogram
    

In [181]:
# Assignes every feature (X) to one centroid (vocabulary)
histograms = []

for image in trainDataFeatures:
    code, distance = vq(image,voc)
    
    normalizedHistogram = computeImageHistogram(code)
    histograms.append(normalizedHistogram)



In [182]:
from sklearn import svm

svc = svm.LinearSVC()

print("SVC fit started")
svc.fit(histograms,trainDataCategories) 
print("SVC fit finished")

SVC fit started
SVC fit finished


In [183]:
correctClassification = 0
totalClassification = 0

for i,feature in enumerate(testDataFeatures):
    code, distance = vq(feature,voc)
    normalizedHistogram = computeImageHistogram(code)
    
    prediction = svc.predict([normalizedHistogram])[0]

    if prediction == testDataCategories[i]:
        correctClassification += 1
    totalClassification += 1
    
print("Classification Accuracy: " + str(correctClassification/totalClassification*100))

Classification Accuracy: 53.7313432836
