In [13]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sb
import os
import cv2
from random import shuffle


In [27]:
categories = ["dogs", "cats"]
def load_images_from_folder(folder):
    data = []
    for category in categories: 
        path = os.path.join(folder, category)
        class_num = categories.index(category)
        for img in os.listdir(path):
            img_array = cv2.imread(os.path.join(path,img) , 0)
            img_array = cv2.resize(img_array, (100, 100)).flatten() 
            data.append([img_array, class_num])
    return data

images = load_images_from_folder("data/training_set") 
test = load_images_from_folder("data/test_set")

In [28]:
shuffle(images)
shuffle(test)

X_Train = []
Y_Train = []
X_Test = []
Y_Test = []

for features, label in images:
    X_Train.append(features)
    Y_Train.append(label)
    
for features, label in test:
    X_Test.append(features)
    Y_Test.append(label)


In [31]:
print("Training Data : " + str(len(images)))
print("Testing Data : " + str(len(test)))

Training Data : 8000
Testing Data : 2000


In [30]:
#X_Train = np.array(X_Train).reshape(len(X_Train), -1)

classifier = svm.SVC(gamma=0.001).fit(X_Train, Y_Train)

In [None]:
cs = [1.0, 3.0, 5.0, 7.0, 9.0]
kernels = ["linear", "poly", "rbf", "sigmoid"]
best = [0, 0]
bestKernel = ""
for c in cs:
    for kernel in kernels:
        svc = svm.SVC(C = c, kernel = kernel, gamma = "scale")
        svc.fit(X_Train, Y_Train)
        accuracy = svc.score(X_Test, Y_Test)
        if(accuracy > best[0]):
            best[0] = accuracy
            best[1] = c
            bestKernel = kernel
        print("acc: ({}, {}): {:.2f}%".format(c, kernel, accuracy * 100))

acc: (1.0, linear): 53.20%
acc: (1.0, poly): 60.95%
acc: (1.0, rbf): 65.30%
acc: (1.0, sigmoid): 48.00%
acc: (3.0, linear): 53.20%
acc: (3.0, poly): 60.65%
acc: (3.0, rbf): 65.10%
acc: (3.0, sigmoid): 48.00%
acc: (5.0, linear): 53.20%


In [None]:
classifier = svm.SVC(C = best[1], kernel = kernel, gamma=0.001).fit(X_Train, Y_Train)

#X_Test = np.array(X_Test).reshape(len(X_Test),  -1)
Y_Pred = classifier.predict(X_Test)

In [None]:
print(classifier, classification_report(Y_Test, Y_Pred))
cf_matrix = confusion_matrix(Y_Test, Y_Pred)
print(cf_matrix)
sb.heatmap(cf_matrix, cmap = "Blues", annot = True)

#task: instead of using raw pixels as the features, you can explores other features descriptor e.g. HoG as the features