In [1]:
from scipy import io
from scipy import stats
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from cma import CMA
import pandas as pd

# Config

In [2]:
ns = 50          # Number of labeled data
testLen = 480    # Number of test images
block_size = 2   # Number of test block size
startIdxArray = [350, 400, 450, 500, 550]   # Array of starting index
alpha = 1.5      # Forgetting parameter for online subspace learning
dim = 10         # Numeber of dims
C = 1

def bind_args(config):
    class ArgsObject: pass
    args = ArgsObject()
    for k in config:
        setattr(args, k, config[k])
    return args

# Load & Process data

In [3]:
# Load data
X = io.loadmat("./data/caltran_gist.mat")
y = io.loadmat("./data/caltran_dataset_labels.mat")
features = np.array(X['data']['features'][0][0])
names = y['names']
labels = np.array(y['labels']).T
# Delete NAN
ids = ~np.isnan(features).any(axis=1)
features = features[ids, :]
labels = labels[ids, :]
names  = names[ids, :]
# Perform L1 Zscore
features = features / np.repeat(np.sum(np.abs(features), keepdims=True, axis=0), features.shape[0], axis=0)
features = stats.zscore(features, axis=1)

# Experiment Func

In [4]:
def experiment(startIdx, model):
    Xs, ys = features[startIdx: startIdx+ns, :], labels[startIdx: startIdx+ns, :]
    Xt, yt = features[startIdx+ns: startIdx+ns+testLen, :], labels[startIdx+ns: startIdx+ns+testLen, :]
    model.fit(Xs, ys.ravel())
    
    T = Xt.shape[0]
    preds = []
    for i in range(0, T, block_size):
        X = Xt[i: i+block_size, :]
        preds.append(model.predict(X))
    return accuracy_score(yt, np.concatenate(preds))

# Experiments

In [5]:
models = {
    "KNN": KNeighborsClassifier(weights="distance"),
    "SVM": LinearSVC(C=C),
    "KNN_cgfk": CMA(KNeighborsClassifier(weights="distance"), **{'alpha': alpha, 'dim': dim, 'mode': 'cgfk'}),
    "KNN_csa": CMA(KNeighborsClassifier(weights="distance"), **{'alpha': alpha, 'dim': dim, 'mode': 'csa'}),
    "SVM_cgfk": CMA(LinearSVC(C=C), **{'alpha': alpha, 'dim': dim, 'mode': 'cgfk'}),
    "SVM_csa": CMA(LinearSVC(C=C), **{'alpha': alpha, 'dim': dim, 'mode': 'csa'}),
}

results = pd.DataFrame(columns=[k for k in models])

for startIdx in startIdxArray:
    acc_arr = []
    for key in models:
        acc = experiment(startIdx, models[key]) * 100
        acc_arr.append(acc)
    results.loc[startIdx] = acc_arr

results.loc["Mean"], results.loc["Std"] = results.mean(), results.std() / np.sqrt(len(startIdxArray))



In [6]:
results.round(2)

Unnamed: 0,KNN,SVM,KNN_cgfk,KNN_csa,SVM_cgfk,SVM_csa
350,63.96,77.5,66.46,69.17,84.79,84.79
400,65.21,72.08,64.17,64.17,73.96,74.17
450,56.46,69.58,56.67,56.88,72.5,72.71
500,56.04,72.08,52.92,53.54,66.25,67.71
550,55.0,71.67,55.0,53.96,76.25,79.38
Mean,59.33,72.58,59.04,59.54,74.75,75.75
Std,2.17,1.31,2.65,3.07,3.01,2.93
