# Exploring the extreme value machine classifier

### importing dependencies and reading data

In [1]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neighbors import KernelDensity
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
import bayes_classifier
from classifiers import MEVM
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve

df = pd.read_csv('./dataset/data.csv')
data = df.drop(['person_ID', 'frame', 'stream', 'sequance'], axis=1)
target = df["person_ID"]



### preparing the data

For conducting all the experiments in this section the ChokePoint Dataset was used by first having the data processed by the facenet neural network to produce a data frame of the embedings of all the faces alongside some identifiable information like the person_ID, frame, stream and sequance

We have decided to use the protocol suggested by the authors of the dataset for the verification, in which the data is devided into two groups G1 and G2, each group will play the turn of training set and evaluation set in turn. for more information please refer to http://arma.sourceforge.net/chokepoint/

and we have decided to use case study 1 which is concerned with:
1. indoor scenes only
2. short time intervals

In [2]:
G1_streams = ["P1E_S1_C1", "P1E_S2_C2", "P1L_S1_C1", "P1L_S2_C2"]
G1_sequence = ["P1E_S1", "P1E_S2","P1L_S1", "P1L_S2"]
G2_streams = ["P1E_S3_C3", "P1L_S3_C3", "P1E_S4_C1", "P1L_S4_C1"]
G2_sequence = ["P1E_S3", "P1L_S3","P1E_S4", "P1L_S4"]

In [3]:
G1_data = df[df["stream"].isin(G1_streams)]
G1_indices = G1_data.index[G1_data["stream"].isin(G1_streams)]
G1_sequance_ind = [G1_data.index[G1_data["sequance"] == sequence] 
                   for sequence in G1_sequence]
G2_data = df[df["stream"].isin(G2_streams)]
G2_indices = G2_data.index[G2_data["stream"].isin(G2_streams)]
G2_sequance_ind = [G2_data.index[G2_data["sequance"] == sequence] 
                   for sequence in G2_sequence]

A list of indices is prepared for cross validation, it contains 32 pairs of sets of indices where each sequance of G1 will be used as training and tested against each sequance of G2 and vice versa

In [4]:
cv_G1 = [(G1_S, G2_S) for G1_S in G1_sequance_ind for G2_S in G2_sequance_ind]
cv_G2 = [(G2_S, G1_S) for G1_S in G1_sequance_ind for G2_S in G2_sequance_ind]
cv = cv_G1 + cv_G2

### Test 1: exploring the accuracy of the classifier when trained on all classes

In this test the classifier is tuned according to the protocol discussed above, to find the best accuracy it can reach

To better suite the data to the application that we are working on it was decided that the classifier should be trained on data only from one sequence at a time rather than the entire group 

For the EVM classifier we are searching to tune 3 parameters (tailsize, cover_threshold, distance_multiplier) and due to how computationally expensive it would be to search all possible combinations, the parameters will be tuned one at a time, for example parameter 1 will be searched while parameter 2 and 3 are set to the defaults and then parameter 2 will be searched with parameter 1 set to the previous found value while parameter 3 is set to the default and so on

In [5]:
param_grid = {"cover_threshold": 10 ** np.linspace(-2, 0.5, 40),
              "tailsize": np.linspace(0, 800, 40, dtype=int),              
              "distance_multiplier": 10 ** np.linspace(-2, 0.5, 40)}

In [6]:
candidates = {}
for parameter, values in param_grid.items():
    candidates[parameter] = values
    grid_g = GridSearchCV(MEVM(), candidates, 
                          cv=cv, refit=False, n_jobs=16)
    grid_g.fit(data, target)
    candidates[parameter] = [grid_g.best_params_[parameter]]

scores_g = grid_g.cv_results_.get('mean_test_score').tolist()

In [7]:
print(grid_g.best_params_)
print('accuracy =', grid_g.best_score_)

{'cover_threshold': 1.1253355826007645, 'distance_multiplier': 0.7227271320676177, 'tailsize': 800}
accuracy = 0.9856309396604963


### Test 2: exploring the accuracy of the classifier when a subset of the classes is unknown

For this test a third of the classes are replced by a choosen label as unknowns, within the classifier implementation any data with this labe is not used for training and the classifier produces this label when the probability of all the classes is less than a determined threshold.

The probability theshold is passed as a parameter to the classifier when it is initialized

In [46]:
IDs = df.person_ID.unique()
unknowns= IDs[:len(IDs)//3]

preparing the target list

In [47]:
df_novel = df.copy()
for unknown in unknowns:
    df_novel["person_ID"].replace({unknown : 2}, inplace=True)
targets_novel = df_novel["person_ID"]


In this test we are searching to find the optimal value for the novelty parameter given the parameters we have found in the previous test

In [48]:
param_grid = {}
for parameter, value in grid_g.best_params_.items():
    param_grid[parameter] = [value]
novelty = 10 ** np.linspace(-2, 0, 30)
param_grid["novelty"] = novelty

In [49]:
grid_novel = GridSearchCV(MEVM(), param_grid, 
                          cv=cv, refit=False, n_jobs=16)
grid_novel.fit(data, targets_novel)

scores_novel = grid_novel.cv_results_.get('mean_test_score').tolist()

In [50]:
print(grid_novel.best_params_)
print('accuracy =', grid_novel.best_score_)

{'cover_threshold': 1.1253355826007645, 'distance_multiplier': 0.7227271320676177, 'novelty': 0.8531678524172805, 'tailsize': 800}
accuracy = 0.930301391836054


##### Further more the classifier is tested again with the same parameters but with different subset of the classes as unknowns to check if it provides similar results 

In [51]:
unknowns_test= IDs[-len(IDs)//3:]

In [52]:
df_novel_test = df.copy()
for unknown in unknowns_test:
    df_novel_test["person_ID"].replace({unknown : 2}, inplace=True)
targets_novel_test = df_novel_test["person_ID"]

In [53]:
param_grid = {}
for parameter, value in grid_novel.best_params_.items():
    param_grid[parameter] = [value]

In [54]:
grid_novel_test = GridSearchCV(MEVM(), param_grid, 
                               cv=cv, refit=False, n_jobs=16)
grid_novel_test.fit(data, targets_novel_test)

scores_novel_test = grid_novel_test.cv_results_.get('mean_test_score').tolist()

In [55]:
print('accuracy =', grid_novel_test.best_score_)

accuracy = 0.9020545787314297


### test 3:exploring the accuracy of the classifier when a subset of the classes is unknown while tracking the person being recognized

In this test we combine all the predictions from a given sequence for an individual to estimate multiple predictions of a tracked subject

Furthermore we use the "n" parameter to limit the number of samples used for training and the "shuffle" parameter to shuffle the data for a given class before using the first n samples

"n" and "shuffle" parameters together simulate choosing n random samples of a given sequence

In [18]:
data_novel_tracks = df.copy()
data_novel_tracks['tracks'] = (data_novel_tracks['person_ID'].astype(str) + 
                               data_novel_tracks['sequance'])
for unknown in unknowns:
    data_novel_tracks["person_ID"].replace({unknown : 2}, inplace=True)
targets_novel_tracks = data_novel_tracks["person_ID"]
data_novel_tracks = data_novel_tracks.drop(['person_ID', 'frame', 
                                            'stream', 'sequance'], axis=1)

Since the number of training samples is limited to n samples it is important that all parameters are tuned again and thus for this test all three parameters will be tuned again alongside the novelty parameter

One of the most important parameter to retune is the tailsize (number of negative samples used to train the EVM), as it was observed that the number of negative samples needed is highly affected by the number of samples used tto train the EVM.

In [39]:
param_grid = {"cover_threshold": 10 ** np.linspace(-2, 0.5, 40),              
              "tailsize": np.linspace(0, 70, 40, dtype=int),              
              "distance_multiplier": 10 ** np.linspace(-2, 0.5, 40),
              "novelty": 10 ** np.linspace(-2, 0, 40)}

In [40]:
candidates = {"n":[35], "shuffle":[True], "novelty":[0.5],
              "cover_threshold":[grid_g.best_params_["cover_threshold"]]}
for parameter, values in param_grid.items():
    candidates[parameter] = values
    grid_novel_tracks = GridSearchCV(MEVM(), candidates, 
                          cv=cv, refit=False, n_jobs=16)
    grid_novel_tracks.fit(data_novel_tracks, targets_novel_tracks)
    candidates[parameter] = [grid_novel_tracks.best_params_[parameter]]

scores_novel_tracks = grid_novel_tracks.cv_results_.get('mean_test_score').tolist()

In [41]:
print(grid_novel_tracks.best_params_)
print('accuracy =', grid_novel_tracks.best_score_)

{'cover_threshold': 1.7522244804127838, 'distance_multiplier': 0.7227271320676177, 'n': 35, 'novelty': 0.5541020330009492, 'shuffle': True, 'tailsize': 70}
accuracy = 0.9908661750936569


##### The classifier is tested again with the same parameters but with different subset of the classes as unknowns to check if it provides similar results

In [42]:
data_novel_tracks_test = df.copy()
data_novel_tracks_test['tracks'] = (data_novel_tracks_test['person_ID'].astype(str) + 
                                    data_novel_tracks_test['sequance'])
for unknown in unknowns_test:
    data_novel_tracks_test["person_ID"].replace({unknown : 2}, inplace=True)
targets_novel_tracks_test = data_novel_tracks_test["person_ID"]

In [43]:
param_grid = {}
for parameter, value in grid_novel_tracks.best_params_.items():
    param_grid[parameter] = [value]

In [44]:
grid_novel_tracks_test = GridSearchCV(MEVM(), param_grid,
                                      cv=cv, refit=False, n_jobs=16)
grid_novel_tracks_test.fit(data_novel_tracks, targets_novel_tracks_test)

scores_novel_tracks_test = grid_novel_tracks_test.cv_results_.get('mean_test_score').tolist()

In [45]:
print('accuracy =', grid_novel_tracks_test.best_score_)

accuracy = 0.9601887604692256
