In [34]:
import os
import pickle
import numpy as np
import pandas as pd
from PIL import Image
from sklearn import svm, preprocessing, model_selection, metrics, pipeline
from tqdm import tqdm

In [8]:
num_ids = 3
num_reps = 100
num_samples = num_reps * num_ids

indTest = np.arange(0,num_samples,num_reps)
indAll = np.arange(0,num_samples)

perf_fold = np.zeros(shape=(num_reps,))

x = np.arange(0,num_ids)
trainCat = np.repeat(x,num_reps-1)

In [9]:
fname = os.path.join('features', 'mnist_features.pkl')
with open(fname, 'rb') as handle:
    features = pickle.load(handle)
    
ids = list(features.keys())

In [21]:
features['same_image'][0].keys()

dict_keys(['stimulus', 'conv1', 'conv2', 'fc6', 'fc7'])

In [10]:
for label in ids:
    act = features[label][0]['fc7']
    print(label, act.shape)

same_label (32448,)
same_image (32448,)
different_label (32448,)


In [None]:
num_reps = 100
num_ids = 3
num_samples = num_reps * num_ids

indTest = np.arange(0,num_samples,num_ids)
indAll = np.arange(0,num_samples)

perf_fold = np.zeros(shape=(num_ids,))

for iFold in tqdm(range(num_reps)):
    
    indTrain = np.setdiff1d(indAll,indTest+iFold)
    
    dataTest = act[indTest+iFold,:]
    dataTrain = act[indTrain,:]

    clf = svm.LinearSVC(dual='auto')
    clf.fit(dataTrain,trainCat)

    dec = clf.predict(dataTest)

    diff = dec - x
    perf = np.where(diff == 0)[0]
    perf = len(perf)/num_ids

    perf_fold[iFold] = perf

In [27]:
def stack_features(features, layer):
    num_reps = len(features[ids[0]])  # 100
    X, y, groups = [], [], []
    for rep in range(num_reps):
        for cls_id, cls_name in enumerate(ids):
            vec = np.asarray(features[cls_name][rep][layer]).ravel()
            X.append(vec)
            y.append(cls_id)
            groups.append(rep)  # group = triplet id
    return np.vstack(X), np.array(y), np.array(groups)

In [28]:
layer = "fc7"

act, trainCat, groups = stack_features(features, layer)
num_reps = len(np.unique(groups))
num_ids = len(ids)
num_samples = act.shape[0]

In [35]:
clf = pipeline.make_pipeline(
    preprocessing.StandardScaler(),
    svm.LinearSVC(dual='auto', class_weight='balanced', max_iter=5000, random_state=0)
)

In [None]:
logo = model_selection.LeaveOneGroupOut()
perf_fold = np.zeros(shape=(num_reps,))
y_true_all, y_pred_all = [], []

for fold_idx, (indTrain, indTest) in enumerate(logo.split(act, trainCat, groups)):
    clf.fit(act[indTrain, :], trainCat[indTrain])
    dec = clf.predict(act[indTest, :])

    perf_fold[fold_idx] = metrics.accuracy_score(trainCat[indTest], dec)
    y_true_all.extend(trainCat[indTest])
    y_pred_all.extend(dec)

overall_acc = perf_fold.mean()
cm = metrics.confusion_matrix(y_true_all, y_pred_all, labels=[0,1,2])

print(f"Overall accuracy: {overall_acc:.3f}")
print("Per-fold accuracy (one triplet per fold):", perf_fold.shape, "folds")
print("Confusion matrix (rows=true, cols=pred):")
print(cm)
print("Label order:", class_names)

