In [1]:
%matplotlib widget
import spike_data_augmentation as sda
import numpy as np
import sklearn as skl
import ipdb
import time
from sklearn.cluster import MiniBatchKMeans
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from utils import plot_centers, create_histograms
from tqdm.auto import tqdm
print(time.strftime("Started on %a, %d %b %Y %H:%M:%S", time.gmtime()))

Started on Fri, 05 Jun 2020 14:38:42


### Parametrise notebook using papermill

In [2]:
surface_dimensions = [11,11]
dropout_probability = 0
refractory_period = 0
time_constant = 20e3
n_of_centers = 1500
dataset = 'NCARS'
first_saccade_only = False
file_name = 'placeholder'

### Choose training dataset and transforms

In [3]:
transform = sda.transforms.Compose([sda.transforms.RefractoryPeriod(refractory_period=refractory_period), 
                                sda.transforms.DropEvents(drop_probability=dropout_probability)])

surface_transform = sda.transforms.Compose([sda.transforms.ToTimesurface(surface_dimensions=surface_dimensions, tau=time_constant, merge_polarities=True)])
transform.transforms += surface_transform.transforms

args = dict(save_to='./data', train=True, transform=surface_transform, download=False)
if dataset == 'IBMGesture':
    trainset = sda.datasets.IBMGesture(**args)
elif dataset == 'NCARS':
    trainset = sda.datasets.NCARS(**args)
elif dataset == 'NMNIST':
    trainset = sda.datasets.NMNIST(**args, first_saccade_only=first_saccade_only)
trainloader = sda.datasets.dataloader.Dataloader(trainset, shuffle=True)

### Read timesurfaces and cluster

In [6]:
trainloader = sda.datasets.dataloader.Dataloader(trainset, shuffle=True)
trainiterator = iter(trainloader)

all_surfaces = []
all_labels = []
for surfaces, label in tqdm(trainiterator):
    surfaces = surfaces.reshape((-1, *surface_dimensions))
    all_surfaces.append(surfaces)
    all_labels.append(label)
stack = np.vstack(all_surfaces)
print('Read ' + str(stack.shape[0]) + ' surfaces.')

kmeans = KMeans(n_clusters=n_of_centers).fit(stack)

HBox(children=(FloatProgress(value=0.0, max=15422.0), HTML(value='')))




KeyboardInterrupt: 

### Train classifiers

In [7]:
trainloader = sda.datasets.dataloader.Dataloader(trainset, shuffle=True)
trainiterator = iter(trainloader)

training_cluster_assignments = []
Y_train = []
for surfaces, label in tqdm(trainiterator):
    surfaces = surfaces.reshape(-1, dims_prod)
    surf_labels = kmeans.predict(surfaces)
    training_cluster_assignments.append(surf_labels)
    Y_train.append(label)

X_train = create_histograms(training_cluster_assignments, n_of_centers)
scaler = skl.preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)

logreg = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000)
logreg.fit(X_train, Y_train)

gnb = GaussianNB()
gnb.fit(X_train, Y_train)

knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)

HBox(children=(FloatProgress(value=0.0, max=15422.0), HTML(value='')))




KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

### Build testing features and classify

In [8]:
args_test = dict(save_to='./data', train=False, transform=surface_transform, download=True)
if dataset == 'IBMGesture':
    testset = sda.datasets.IBMGesture(**args_test)
elif dataset == 'NCARS':
    testset = sda.datasets.NCARS(**args_test)
elif dataset == 'NMNIST':
    testset = sda.datasets.NMNIST(**args_test, first_saccade_only=first_saccade_only)
testloader = sda.datasets.dataloader.Dataloader(testset, shuffle=True)
testiterator = iter(testloader)

testing_cluster_assignments = []
Y_test = []
for surfaces, label in tqdm(testiterator):
    surfaces = surfaces.reshape(-1, np.prod(surface_dimensions))
    surf_labels = kmeans.predict(surfaces)
    testing_cluster_assignments.append(surf_labels)
    Y_test.append(label)

Using downloaded and verified file: ./data/Prophesee_Dataset_n_cars.zip
Extracting ./data/Prophesee_Dataset_n_cars.zip to ./data


HBox(children=(FloatProgress(value=0.0, max=8607.0), HTML(value='')))




In [9]:
X_test = create_histograms(testing_cluster_assignments, n_of_centers)
X_test = scaler.transform(X_test)
assert len(X_test) == len(Y_test)

scores = dict(zip(['logreg', 'gnb', 'knn'], [0,0,0]))
scores['logreg'] = logreg.score(X_test, Y_test)
scores['gnb'] = gnb.score(X_test, Y_test)
scores['knn'] = knn.score(X_test, Y_test)
scores = {k: round(v, 4) for k,v in scores.items()}
winner_classifier = max(scores.keys(), key=(lambda key: scores[key]))
print(str(scores))
#print(skl.metrics.classification_report(Y_test, logreg.predict(X_test)))
print(skl.metrics.confusion_matrix(Y_test, logreg.predict(X_test)))
print(time.strftime("Finished on %a, %d %b %Y %H:%M:%S", time.gmtime()))

{'logreg': 0.7696, 'gnb': 0.5759, 'knn': 0.7169}
[[3071 1140]
 [ 843 3553]]
Finished on Thu, 04 Jun 2020 16:31:09


In [10]:
np.save("ncars_normalised_new.npy", scores, allow_pickle=True)

In [None]:
np.load("ncars_normalised_new.npy", allow_pickle=True)

In [12]:
X_train_orig = create_histograms(training_cluster_assignments, n_of_centers)
X_test_orig = create_histograms(testing_cluster_assignments, n_of_centers)

np.save("X_train.npy", X_train_orig)
np.save("X_test.npy", X_test_orig)
np.save("Y_train.npy", Y_train)
np.save("Y_test.npy", Y_test)


### don't look at this hacky bit to list scores in nb filenames generated by papermill ;P

In [None]:
import os
new_file_name = './milled_nbs/' + str(scores[winner_classifier]) + '_' + winner_classifier + file_name
os.rename('./milled_nbs/' + file_name, new_file_name)