In [1]:
import numpy as np
import pickle

import multiprocessing
from joblib import Parallel, delayed

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist, pdist

from scipy.stats import mode

import time

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
sns.set_context('talk')
plt.rcParams["axes.prop_cycle"] = plt.cycler("color", plt.cm.tab20.colors)
matplotlib.rcParams['lines.linewidth'] = 5

In [2]:
def bipolarize(Y):
    X = np.copy(Y)
    X[X > 0] = 1.0
    X[X < 0] = -1.0
    X[X == 0] = np.random.choice([-1.0, 1.0], size=len(X[X == 0]))
    return X

def centroids(X,label=None):
    if label is not None:
        cLabel,cCounts = np.unique(label,return_counts=True)
        cLabel = cLabel[cCounts > 1]
        c = np.zeros((len(cLabel), X.shape[1]))
        for i,l in enumerate(cLabel):
            c[i,:] = bipolarize(np.sum(X[label==l],axis=0))
    else:
        c = bipolarize(np.sum(X,axis=0)).reshape(1,-1)
        cLabel = [0]
    return cLabel, c.astype('int')

def classify(v,am,metric):
    d = cdist(v,am,metric)
    label = np.argmin(d,axis=1)
    return label

In [3]:
np.set_printoptions(precision=4)

### loading data
# select dataset and encoding type
dataName = 'allHV.npz'
emgHVType =  'hvRelAcc'

allHV = np.load(dataName)

# extract data and labels based on gesture, trial, and position
hv = allHV[emgHVType]
gestLabel = allHV['gestLabel']
posLabel = allHV['posLabel']
trialLabel = allHV['trialLabel']

combGP, groupGP = np.unique(np.column_stack((gestLabel,posLabel)),axis=0,return_inverse=True)
combGPT, groupGPT = np.unique(np.column_stack((gestLabel,posLabel,trialLabel)),axis=0,return_inverse=True)

# get list of unique values for each label
gestures = np.unique(gestLabel)
positions = np.unique(posLabel)
trials = np.unique(trialLabel)

numGestures = len(gestures)
numPositions = len(positions)
numTrials = len(trials)

# get data size info
D = hv.shape[1] # hypervector dimension
numHV = 80 # number of examples per trial

In [4]:
def test_clustering(X,y,grp,clust,numClust,numSplit=10):
    c = np.hstack([clust[l][numClust[l]] for l in np.unique(y)])
    skf = StratifiedKFold(n_splits=numSplit)
    splitIdx = 0
    res = []
#     dropGest = []
    for trainIdx, testIdx in skf.split(X,grp):
    #         print('Running iteration %d of %d...' % (splitIdx+1, numSplit))
        XTrain, XTest = X[trainIdx], X[testIdx]
        yTrain, yTest = y[trainIdx], y[testIdx]
        cTrain, cTest = c[trainIdx], c[testIdx]

        AM = []
        AMlabels = []
        for l in np.unique(yTrain):
            AM.append(centroids(XTrain[yTrain == l],label=cTrain[yTrain == l])[1])
            AMlabels.append(l*np.ones(len(np.unique(cTrain[yTrain == l]))))
        AM = np.vstack(AM)
        AMlabels = np.hstack(AMlabels)

        dTrain = cdist(XTrain,AM,'cosine')
        dTest = cdist(XTest,AM,'cosine')

        dropCols = []
        accTrain = []
        accTest = []

        accTrain.append(accuracy_score(AMlabels[np.argmin(dTrain,axis=1)], yTrain))
        accTest.append(accuracy_score(AMlabels[np.argmin(dTest,axis=1)], yTest))
        remainingPrototypes = [sum(np.delete(AMlabels,dropCols) == g) for g in gestures]
        print(remainingPrototypes, accTrain[-1], accTest[-1])
        rp = []
        rp.append(remainingPrototypes)
        while len(dropCols) < len(AMlabels) - numGestures:
            bestAcc = 0
            bestDrop = -1
            for dropCandidate in [ii for ii in range(len(AMlabels)) if ii not in dropCols]:
                dTemp = np.copy(dTrain)
                dTemp[:,dropCols + [dropCandidate]] = np.inf
                acc = accuracy_score(AMlabels[np.argmin(dTemp,axis=1)], yTrain)
                if acc > bestAcc:
                    bestAcc = acc
                    bestDrop = dropCandidate

            accTrain.append(bestAcc)
            dropCols.append(bestDrop)

            dTemp = np.copy(dTest)
            dTemp[:,dropCols] = np.inf
            accTest.append(accuracy_score(AMlabels[np.argmin(dTemp,axis=1)], yTest))

            remainingPrototypes = [sum(np.delete(AMlabels,dropCols) == g) for g in gestures]
            rp.append(remainingPrototypes)
            print(remainingPrototypes, accTrain[-1], accTest[-1])

        res.append((rp, accTrain, accTest))
#         if dropCols:
#             dropGest.append(int(find_first_repeat(AMlabels[np.array(dropCols)],2)))
#         else:
#             dropGest.append(-1)

        splitIdx += 1
    
#     return res, mode(dropGest)[0]
    return res

In [5]:
clustering = []
for g in gestures:
    c = {}
    c[numPositions] = posLabel[gestLabel == g]
    clustering.append(c)
    
numClust = np.ones(numGestures)*numPositions

In [6]:
r = test_clustering(hv,gestLabel,groupGP,clustering,numClust,numSplit=10)

[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8] 0.9903400997150997 0.9735576923076923
[8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8] 0.9903400997150997 0.9735576923076923
[8, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8] 0.9901620370370371 0.9735576923076923
[8, 8, 8, 8, 8, 7, 7, 8, 7, 8, 8, 8, 8] 0.9889155982905983 0.9667467948717948
[8, 8, 8, 8, 7, 7, 7, 8, 7, 8, 8, 8, 8] 0.9874910968660968 0.9667467948717948
[8, 8, 8, 7, 7, 7, 7, 8, 7, 8, 8, 8, 8] 0.9849537037037037 0.9659455128205128
[8, 8, 8, 7, 6, 7, 7, 8, 7, 8, 8, 8, 8] 0.9821047008547008 0.9563301282051282
[8, 8, 8, 7, 6, 7, 6, 8, 7, 8, 8, 8, 8] 0.9790776353276354 0.9515224358974359
[8, 8, 8, 6, 6, 7, 6, 8, 7, 8, 8, 8, 8] 0.9759170227920227 0.9507211538461539
[8, 8, 8, 6, 6, 6, 6, 8, 7, 8, 8, 8, 8] 0.973869301994302 0.9475160256410257
[8, 8, 8, 5, 6, 6, 6, 8, 7, 8, 8, 8, 8] 0.9706196581196581 0.9451121794871795
[8, 8, 8, 5, 6, 5, 6, 8, 7, 8, 8, 8, 8] 0.9721777065527065 0.9475160256410257
[8, 7, 8, 5, 6, 5, 6, 8, 7, 8, 8, 8, 8] 0.9673700142450142 0.9399

In [7]:
iters = max([len(x[1]) for x in r])
    
clustSize = np.zeros((iters,numGestures))
accTrain = np.zeros(iters)
accTest = np.zeros(iters)

for x in r:
    clustSize[:len(x[1])] += np.array(x[0])/10
    accTrain[:len(x[1])] += np.array(x[1])/10
    accTest[:len(x[1])] += np.array(x[2])/10

In [8]:
with open('optimal_clustering_reverse_positions.pickle', 'wb') as f:
    pickle.dump((clustSize,accTrain,accTest), f, protocol=pickle.HIGHEST_PROTOCOL)

In [9]:
accTest

array([0.9839, 0.9839, 0.9837, 0.9819, 0.9798, 0.9785, 0.9773, 0.9744,
       0.9711, 0.9696, 0.967 , 0.9624, 0.9558, 0.9517, 0.9476, 0.9407,
       0.9352, 0.9293, 0.9228, 0.9173, 0.914 , 0.9071, 0.9028, 0.9023,
       0.8992, 0.8964, 0.8928, 0.8859, 0.881 , 0.8773, 0.8724, 0.8685,
       0.8665, 0.8629, 0.8607, 0.8535, 0.849 , 0.8437, 0.8391, 0.832 ,
       0.8248, 0.816 , 0.809 , 0.8005, 0.7918, 0.7845, 0.7758, 0.7665,
       0.759 , 0.7505, 0.7424, 0.7353, 0.7279, 0.7211, 0.7133, 0.7046,
       0.6972, 0.6919, 0.6827, 0.6744, 0.667 , 0.6609, 0.6514, 0.6421,
       0.6326, 0.6234, 0.6147, 0.6099, 0.6054, 0.5981, 0.5921, 0.5881,
       0.5828, 0.5758, 0.5732, 0.568 , 0.5624, 0.5569, 0.552 , 0.5514,
       0.5414, 0.5297, 0.5148, 0.5014, 0.4855, 0.4728, 0.4596, 0.445 ,
       0.4306, 0.4148, 0.4111, 0.3976])