In [1]:
import numpy as np
import random
import utils
import BoW
import pickle

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, LinearSVC
from sklearn.multiclass import OneVsRestClassifier

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
# for reproducibility
random.seed(1)
np.random.seed(1)

## Set the parameters

In [3]:
step_size = 0
patch_size = 0
dim = 1024
num_kmeans_samples = 20000
train_number = 15
caltech_repo = "./256_ObjectCategories/"

# Generate the codebook and initialize the feature function

In [None]:
# In the phase of getting bag of words, we don't need to and also cannot load all the images.
bag_of_words_dataset = utils.Dataset_old(caltech_repo, drop_rate = 0.9)
# get the dictionary of the Bag of Words.
kmeans_model = BoW.generate_kmeans_model(bag_of_words_dataset.get_data_X(), 
                   dim, patch_size, step_size, num_kmeans_samples)
# get the feature function
feature_func = lambda x: BoW.feature_function_model_unfeeded(
                                x, dim, step_size, patch_size, kmeans_model)

## Load the dataset

In [None]:
# generate the full dataset in feature representation.
BoW_dataset = utils.Dataset(caltech_repo, 
                   feature_function = feature_func)
BoW_dataset.generate_train_test_samples()

SPM_dataset = utils.Dataset(caltech_repo, pyramid = 2,
                   feature_function = feature_func)
SPM_dataset.generate_train_test_samples()

## Initialize the classifiers

In [None]:
C = 1
gamma = 1/dim

In [None]:
scaler = StandardScaler()

RBF_SVC = SVC(kernel='rbf', C=C, gamma = gamma)
Linear_SVC = SVC(kernel='linear', C=C)
HIV_SVC = utils.HistIntersectionModel(C=C)

## Choose the dataset and model

In [None]:
dataset = # BoW_dataset or SPM_dataset
model = # RBF_SVC or Linear_SVC or HIV_SVC

## Train on the dataset and model

In [None]:
training_acc = data_set.train(scaler, model, training_number)
validation_acc = data_set.test(scaler, model)

## Test the trained model

In [None]:
test_acc = data_set.train(scaler, model, validation = False)

## Random Search

In [None]:
record = {}

Cs = np.pow(np.random.uniform(-1, 1, 10), 10)
gammas = np.pow(np.random.uniform(-5, -3, 10), 10)

for C, gamma in zip(Cs, gammas):
    RBF_SVC = SVC(kernel='rbf', C=C, gamma = gamma)
    training_acc = data_set.train(scaler, model, training_number)
    validation_acc = data_set.test(scaler, model)
    
    record[(C, gamma)] = (training_acc, validation_acc)