### Import Packages

In [1]:
from matplotlib import pyplot as plt
import numpy as np
import os.path as osp
import pickle
import scipy.misc

#### Load training dataset

In [2]:
from image_generator import load_train_images
train_images_wenda = load_train_images('training_set/wenda')
train_images_others = load_train_images('training_set/others')

In [3]:
training_set = []
training_set.extend(train_images_wenda)
training_set.extend(train_images_others)

In [4]:
train_labels = ['wenda'] * len(train_images_wenda)
train_labels.extend(['others'] * len(train_images_others))

### Generate Vocab

In [5]:
%load_ext autoreload
%autoreload 2

from image_generator import build_vocab
vocab_filename = 'vocab_wenda.pkl'
if not osp.isfile(vocab_filename):
    print('No existing visual word vocabulary found. Computing one from training images')
    vocab_size = 200  # Larger values will work better (to a point) but be slower to compute
    vocab = build_vocab(training_set, vocab_size)
    
    with open(vocab_filename, 'wb') as f:
        pickle.dump(vocab, f)
        print('{:s} saved'.format(vocab_filename))

### Generate Feature

In [6]:
import time

start_time = time.time()

from image_generator import bags_of_sifts_spm
train_image_feats = bags_of_sifts_spm(training_set, vocab_filename, 3)

In [7]:
from image_generator import get_images

from image_generator import get_train_val
train_image_ids, val_image_ids = get_train_val()
val_images = get_images(val_image_ids)
train_images = get_images(train_image_ids)

In [8]:
val_images_waldo = load_train_images('validation_set/waldo')
val_images_wenda = load_train_images('validation_set/wenda')
val_images_wizard = load_train_images('validation_set/wizard')
val_images_others = load_train_images('validation_set/others')

In [9]:
val_images_all = []
val_images_all.extend(val_images_waldo)
val_images_all.extend(val_images_wenda)
val_images_all.extend(val_images_wizard)
val_images_all.extend(val_images_others)

In [10]:
val_labels = ['others'] * len(val_images_waldo)
val_labels.extend(['wenda'] * len(val_images_wenda))
val_labels.extend(['others'] * len(val_images_wizard))
val_labels.extend(['others'] * len(val_images_others))

In [11]:
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

def colorConfusionMatrix(y_valid, y_pred_valid):
    cm = confusion_matrix(y_valid, y_pred_valid)
    df_cm = pd.DataFrame(cm, ['wenda', 'others'],
                        ['wenda', 'others'])
    plt.rcParams["figure.figsize"] = (5,5) # set the size of the image
    ax = plt.axes()
    sns.heatmap(df_cm, annot=True, fmt='g',square=True)
    ax.set_title("Confusion Matrix")
    ax.set_ylabel('Actual')  
    ax.set_xlabel('Predicted')  
    plt.show()

#### SVM

In [12]:
from sklearn.svm import SVC
svm = SVC(gamma="scale", decision_function_shape='ovo', probability=True, kernel="linear")
svm.fit(train_image_feats, train_labels)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovo', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [15]:
import time

start_time = time.time()

val_image_feats = bags_of_sifts_spm(val_images_all, vocab_filename, 3)
y_pred = svm.predict(val_image_feats)

In [16]:
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

print(time.time()-start_time)
print("Accuracy: {0}".format(accuracy_score(val_labels, y_pred)))
print("Recall: {0}".format(accuracy_score(val_labels, y_pred)))
print("F1: {0}".format(accuracy_score(val_labels, y_pred)))

47.09952521324158
Accuracy: 0.6363636363636364
Recall: 0.6363636363636364
F1: 0.6363636363636364
