In [1]:
import numpy as np
import pickle

import multiprocessing
from joblib import Parallel, delayed

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist, pdist

from scipy.stats import mode

import time

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
sns.set_context('talk')
plt.rcParams["axes.prop_cycle"] = plt.cycler("color", plt.cm.tab20.colors)
matplotlib.rcParams['lines.linewidth'] = 5

In [2]:
def bipolarize(Y):
    X = np.copy(Y)
    X[X > 0] = 1.0
    X[X < 0] = -1.0
    X[X == 0] = np.random.choice([-1.0, 1.0], size=len(X[X == 0]))
    return X

def centroids(X,label=None):
    if label is not None:
        cLabel,cCounts = np.unique(label,return_counts=True)
        cLabel = cLabel[cCounts > 1]
        c = np.zeros((len(cLabel), X.shape[1]))
        for i,l in enumerate(cLabel):
            c[i,:] = bipolarize(np.sum(X[label==l],axis=0))
    else:
        c = bipolarize(np.sum(X,axis=0)).reshape(1,-1)
        cLabel = [0]
    return cLabel, c.astype('int')

def classify(v,am,metric):
    d = cdist(v,am,metric)
    label = np.argmin(d,axis=1)
    return label

In [3]:
np.set_printoptions(precision=4)

### loading data
# select dataset and encoding type
dataName = 'allHV.npz'
emgHVType =  'hvRel'

allHV = np.load(dataName)

# extract data and labels based on gesture, trial, and position
hv = allHV[emgHVType]
gestLabel = allHV['gestLabel']
posLabel = allHV['posLabel']
trialLabel = allHV['trialLabel']

combGP, groupGP = np.unique(np.column_stack((gestLabel,posLabel)),axis=0,return_inverse=True)
combGPT, groupGPT = np.unique(np.column_stack((gestLabel,posLabel,trialLabel)),axis=0,return_inverse=True)

# get list of unique values for each label
gestures = np.unique(gestLabel)
positions = np.unique(posLabel)
trials = np.unique(trialLabel)

numGestures = len(gestures)
numPositions = len(positions)
numTrials = len(trials)

# get data size info
D = hv.shape[1] # hypervector dimension
numHV = 80 # number of examples per trial

In [4]:
def test_clustering(X,y,grp,clust,numClust,numSplit=10):
    c = np.hstack([clust[l][numClust[l]] for l in np.unique(y)])
    skf = StratifiedKFold(n_splits=numSplit)
    splitIdx = 0
    res = []
#     dropGest = []
    for trainIdx, testIdx in skf.split(X,grp):
    #         print('Running iteration %d of %d...' % (splitIdx+1, numSplit))
        XTrain, XTest = X[trainIdx], X[testIdx]
        yTrain, yTest = y[trainIdx], y[testIdx]
        cTrain, cTest = c[trainIdx], c[testIdx]

        AM = []
        AMlabels = []
        for l in np.unique(yTrain):
            AM.append(centroids(XTrain[yTrain == l],label=cTrain[yTrain == l])[1])
            AMlabels.append(l*np.ones(len(np.unique(cTrain[yTrain == l]))))
        AM = np.vstack(AM)
        AMlabels = np.hstack(AMlabels)

        dTrain = cdist(XTrain,AM,'cosine')
        dTest = cdist(XTest,AM,'cosine')

        dropCols = []
        accTrain = []
        accTest = []

        accTrain.append(accuracy_score(AMlabels[np.argmin(dTrain,axis=1)], yTrain))
        accTest.append(accuracy_score(AMlabels[np.argmin(dTest,axis=1)], yTest))
        remainingPrototypes = [sum(np.delete(AMlabels,dropCols) == g) for g in gestures]
        print(remainingPrototypes, accTrain[-1], accTest[-1])
        rp = []
        rp.append(remainingPrototypes)
        while len(dropCols) < len(AMlabels) - numGestures:
            bestAcc = 0
            bestDrop = -1
            for dropCandidate in [ii for ii in range(len(AMlabels)) if ii not in dropCols]:
                dTemp = np.copy(dTrain)
                dTemp[:,dropCols + [dropCandidate]] = np.inf
                acc = accuracy_score(AMlabels[np.argmin(dTemp,axis=1)], yTrain)
                if acc > bestAcc:
                    bestAcc = acc
                    bestDrop = dropCandidate

            accTrain.append(bestAcc)
            dropCols.append(bestDrop)

            dTemp = np.copy(dTest)
            dTemp[:,dropCols] = np.inf
            accTest.append(accuracy_score(AMlabels[np.argmin(dTemp,axis=1)], yTest))

            remainingPrototypes = [sum(np.delete(AMlabels,dropCols) == g) for g in gestures]
            rp.append(remainingPrototypes)
            print(remainingPrototypes, accTrain[-1], accTest[-1])

        res.append((rp, accTrain, accTest))
#         if dropCols:
#             dropGest.append(int(find_first_repeat(AMlabels[np.array(dropCols)],2)))
#         else:
#             dropGest.append(-1)

        splitIdx += 1
    
#     return res, mode(dropGest)[0]
    return res

In [5]:
clustering = []
for g in gestures:
    c = {}
    c[numPositions] = posLabel[gestLabel == g]
    clustering.append(c)
    
numClust = np.ones(numGestures)*numPositions

In [6]:
r = test_clustering(hv,gestLabel,groupGP,clustering,numClust,numSplit=10)

[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 7, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 6, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 5, 8, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 5, 7, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 5, 6, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 5, 5, 8, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.9759615384615384
[8, 8, 8, 3, 5, 5, 7, 8, 8, 8, 8, 8, 8] 0.9893162393162394 0.975

In [7]:
iters = max([len(x[1]) for x in r])
    
clustSize = np.zeros((iters,numGestures))
accTrain = np.zeros(iters)
accTest = np.zeros(iters)

for x in r:
    clustSize[:len(x[1])] += np.array(x[0])/10
    accTrain[:len(x[1])] += np.array(x[1])/10
    accTest[:len(x[1])] += np.array(x[2])/10

In [10]:
with open('optimal_clustering_reverse_positions.pickle', 'wb') as f:
    pickle.dump((clustSize,accTrain,accTest), f, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
accTest

array([0.9844, 0.9841, 0.9841, 0.9841, 0.9841, 0.9841, 0.984 , 0.984 ,
       0.984 , 0.9839, 0.9839, 0.9839, 0.9839, 0.9839, 0.9839, 0.9839,
       0.9839, 0.9839, 0.9837, 0.9837, 0.9831, 0.9827, 0.9824, 0.9814,
       0.9808, 0.9801, 0.9797, 0.9794, 0.9784, 0.9774, 0.9759, 0.9749,
       0.9736, 0.9729, 0.9713, 0.9686, 0.9667, 0.9657, 0.9637, 0.9601,
       0.9565, 0.9545, 0.951 , 0.9487, 0.9447, 0.9416, 0.9403, 0.938 ,
       0.9341, 0.9309, 0.927 , 0.923 , 0.9171, 0.9154, 0.9094, 0.9045,
       0.9007, 0.8972, 0.8925, 0.8899, 0.8879, 0.8836, 0.8779, 0.8708,
       0.8625, 0.8552, 0.8486, 0.8423, 0.8352, 0.8278, 0.8191, 0.8115,
       0.8054, 0.7974, 0.79  , 0.7833, 0.7764, 0.769 , 0.7593, 0.7499,
       0.7403, 0.7302, 0.7226, 0.7117, 0.7029, 0.6955, 0.6882, 0.6784,
       0.6654, 0.6486, 0.6314, 0.6127])