In [1]:
import numpy as np

from sklearn.metrics import pairwise_distances, accuracy_score
from scipy.spatial.distance import cdist, pdist

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn_lvq import RslvqModel

import umap
import time

import pandas as pd

In [2]:
# select dataset and encoding type
dataName = 'allHV.npz'
emgHVType =  'hvRel'

allHV = np.load(dataName)

# extract data and labels based on gesture, trial, and position
hv = allHV[emgHVType]
gestLabel = allHV['gestLabel']
posLabel = allHV['posLabel']
trialLabel = allHV['trialLabel']

# get list of unique values for each label
gestures = np.unique(gestLabel)
positions = np.unique(posLabel)
trials = np.unique(trialLabel)

numGestures = len(gestures)
numPositions = len(positions)
numTrials = len(trials)

# get data size info
D = hv.shape[1] # hypervector dimension
numHV = 80 # number of examples per trial

# color palettes for plotting
gPalette = sns.color_palette('tab20', numGestures)
pPalette = sns.color_palette('tab20', numPositions)

In [3]:
def bipolarize(Y):
    X = np.copy(Y)
    X[X > 0] = 1.0
    X[X < 0] = -1.0
    X[X == 0] = np.random.choice([-1.0, 1.0], size=len(X[X == 0]))
    return X

In [4]:
def centroids(X,label=None):
    if label is not None:
        c = np.zeros((len(np.unique(label)), X.shape[1]))
        for i,l in enumerate(np.unique(label)):
            c[i,:] = bipolarize(np.sum(X[label==l],axis=0))
    else:
        c = bipolarize(np.sum(X,axis=0)).reshape(1,-1)
    return c

In [5]:
def mean_centroids(X,label=None):
    if label is not None:
        c = np.zeros((len(np.unique(label)), X.shape[1]))
        for i,l in enumerate(np.unique(label)):
            c[i,:] = np.mean(X[label==l],axis=0)
    else:
        c = np.mean(X,axis=0).reshape(1,-1)
    return c

In [6]:
def classify(v,am,metric):
    d = cdist(v,am,metric)
    label = np.argmin(d,axis=1)
    return label

# Performing LVQ on class prototypes

In [7]:
c = {}
X = np.copy(hv)

In [8]:
# generate single prototype per gesture using centroid (median)
init_prototypes = centroids(X,gestLabel)
init_prototype_labels = np.unique(gestLabel).reshape(-1,1)
mean_prototypes = mean_centroids(X,gestLabel)

# create initialized prototype metrix for LVQ
ip = np.concatenate((init_prototypes,init_prototype_labels),axis=1)

# perform classification using cosine distance
pred = init_prototype_labels[classify(X,mean_prototypes,'cosine')]
accCos = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = init_prototype_labels[classify(X,mean_prototypes,'euclidean')]
accEuc = accuracy_score(pred,gestLabel)

# perform classification using cosine distance
pred = init_prototype_labels[classify(X,init_prototypes,'cosine')]
accCosBi = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = init_prototype_labels[classify(X,init_prototypes,'euclidean')]
accEucBi = accuracy_score(pred,gestLabel)

print('Single prototype per gesture, no LVQ:')
print('\tRaw: %f cosine, %f euclidean' % (accCos,accEuc))
print('\tBipolarized: %f cosine, %f euclidean' % (accCosBi,accEucBi))
c['singleCentroid'] = {'centroids':mean_prototypes, 'bipolarizedCentroids':bipolarize(init_prototypes), 'labels':init_prototype_labels, 'accCos':accCos, 'accEuc':accEuc, 'accCosBi':accCosBi, 'accEucBi':accEucBi}

Single prototype per gesture, no LVQ:
	Raw: 0.754607 cosine, 0.748397 euclidean
	Bipolarized: 0.760176 cosine, 0.760176 euclidean


In [10]:
# perform glvq with no prototype intialization
glvq = RslvqModel(prototypes_per_class=1,
                  initial_prototypes=None,
                  sigma=0.01,
                  max_iter=10000,
                  gtol=1e-7,
                  display=False,
                  random_state=None).fit(X,gestLabel)

# get class centroids
cent = np.copy(glvq.w_)
centBi = bipolarize(cent)
lab = glvq.c_w_

# perform classification using cosine distance
pred = lab[classify(X,cent,'cosine')]
accCos = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = lab[classify(X,cent,'euclidean')]
accEuc = accuracy_score(pred,gestLabel)

# perform classification using cosine distance on bipolarized vectors
pred = lab[classify(X,centBi,'cosine')]
accCosBi = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance on bipolarized vectors
pred = lab[classify(X,centBi,'euclidean')]
accEucBi = accuracy_score(pred,gestLabel)

print('Single prototype per gesture, LVQ randomly initialized:')
print('\tRaw: %f cosine, %f euclidean' % (accCos,accEuc))
print('\tBipolarized: %f cosine, %f euclidean' % (accCosBi,accEucBi))
c['singleLVQRandom'] = {'centroids':cent, 'bipolarizedCentroids':centBi, 'labels':lab, 'accCos':accCos, 'accEuc':accEuc, 'accCosBi':accCosBi, 'accEucBi':accEucBi}

d = np.diagonal(cdist(init_prototypes,centBi,'hamming'))
print('\tAverage distance between prototypes: %f' % (np.mean(d)))

Single prototype per gesture, LVQ randomly initialized:
	Raw: 0.974038 cosine, 0.999840 euclidean
	Bipolarized: 0.843029 cosine, 0.843029 euclidean
	Average distance between prototypes: 0.238269


In [11]:
# perform lvq with centroid prototype initialization
glvq = RslvqModel(prototypes_per_class=1,
                  initial_prototypes=ip,
                  sigma=0.01,
                  max_iter=10000,
                  gtol=1e-7,
                  display=False,
                  random_state=None).fit(X,gestLabel)

# get class centroids
cent = np.copy(glvq.w_)
centBi = bipolarize(cent)
lab = glvq.c_w_

# perform classification using cosine distance
pred = lab[classify(X,cent,'cosine')]
accCos = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = lab[classify(X,cent,'euclidean')]
accEuc = accuracy_score(pred,gestLabel)

# perform classification using cosine distance on bipolarized vectors
pred = lab[classify(X,centBi,'cosine')]
accCosBi = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance on bipolarized vectors
pred = lab[classify(X,centBi,'euclidean')]
accEucBi = accuracy_score(pred,gestLabel)

print('Single prototype per gesture, LVQ centroid initialized:')
print('\tRaw: %f cosine, %f euclidean' % (accCos,accEuc))
print('\tBipolarized: %f cosine, %f euclidean' % (accCosBi,accEucBi))
c['singleLVQCentroid'] = {'centroids':cent, 'bipolarizedCentroids':centBi, 'labels':lab, 'accCos':accCos, 'accEuc':accEuc, 'accCosBi':accCosBi, 'accEucBi':accEucBi}

d = np.diagonal(cdist(init_prototypes,centBi,'hamming'))
print('\tAverage distance between prototypes: %f' % (np.mean(d)))

KeyboardInterrupt: 

In [None]:
# get separate label for gesture/arm position pairs
combs, gestPosLabel = np.unique(np.column_stack((gestLabel,posLabel)),axis=0,return_inverse=True)

# create intial prototypes for LVQ
init_prototypes = centroids(X,gestPosLabel)
init_prototype_labels = np.array([combs[x][0] for x in range(combs.shape[0])]).reshape(-1,1)
ip = np.concatenate((init_prototypes,init_prototype_labels),axis=1)
mean_prototypes = mean_centroids(X,gestPosLabel)

# perform classification using cosine distance
pred = init_prototype_labels[classify(X,mean_prototypes,'cosine')]
accCos = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = init_prototype_labels[classify(X,mean_prototypes,'euclidean')]
accEuc = accuracy_score(pred,gestLabel)

# perform classification using cosine distance
pred = init_prototype_labels[classify(X,init_prototypes,'cosine')]
accCosBi = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = init_prototype_labels[classify(X,init_prototypes,'euclidean')]
accEucBi = accuracy_score(pred,gestLabel)

In [None]:
print('Multi prototype per gesture, no LVQ:')
print('\tRaw: %f cosine, %f euclidean' % (accCos,accEuc))
print('\tBipolarized: %f cosine, %f euclidean' % (accCosBi,accEucBi))
c['multiCentroid'] = {'centroids':init_prototypes, 'bipolarizedCentroids':bipolarize(init_prototypes), 'labels':init_prototype_labels, 'accCos':accCos, 'accEuc':accEuc, 'accCosBi':accCosBi, 'accEucBi':accEucBi}

In [None]:
glvq = RslvqModel(prototypes_per_class=numPositions,
                  initial_prototypes=None,
                  sigma=0.01,
                  max_iter=10000,
                  gtol=1e-7,
                  display=False,
                  random_state=None).fit(X,gestLabel)

# get class centroids
cent = np.copy(glvq.w_)
centBi = bipolarize(cent)
lab = glvq.c_w_

# perform classification using cosine distance
pred = lab[classify(X,cent,'cosine')]
accCos = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = lab[classify(X,cent,'euclidean')]
accEuc = accuracy_score(pred,gestLabel)

# perform classification using cosine distance on bipolarized vectors
pred = lab[classify(X,centBi,'cosine')]
accCosBi = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance on bipolarized vectors
pred = lab[classify(X,centBi,'euclidean')]
accEucBi = accuracy_score(pred,gestLabel)

print('Multi prototype per gesture, LVQ random initialized:')
print('\tRaw: %f cosine, %f euclidean' % (accCos,accEuc))
print('\tBipolarized: %f cosine, %f euclidean' % (accCosBi,accEucBi))
c['multiLVQRandom'] = {'centroids':cent, 'bipolarizedCentroids':centBi, 'labels':lab, 'accCos':accCos, 'accEuc':accEuc, 'accCosBi':accCosBi, 'accEucBi':accEucBi}

d = np.diagonal(cdist(init_prototypes,centBi,'hamming'))
print('\tAverage distance between prototypes: %f' % (np.mean(d)))

In [None]:
glvq = RslvqModel(prototypes_per_class=numPositions,
                  initial_prototypes=ip,
                  sigma=0.01,
                  max_iter=10000,
                  gtol=1e-7,
                  display=False,
                  random_state=None).fit(X,gestLabel)

# get class centroids
cent = np.copy(glvq.w_)
centBi = bipolarize(cent)
lab = glvq.c_w_

# perform classification using cosine distance
pred = lab[classify(X,cent,'cosine')]
accCos = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance
pred = lab[classify(X,cent,'euclidean')]
accEuc = accuracy_score(pred,gestLabel)

# perform classification using cosine distance on bipolarized vectors
pred = lab[classify(X,centBi,'cosine')]
accCosBi = accuracy_score(pred,gestLabel)

# perform classification using euclidean distance on bipolarized vectors
pred = lab[classify(X,centBi,'euclidean')]
accEucBi = accuracy_score(pred,gestLabel)

print('Multi prototype per gesture, LVQ centroid initialized:')
print('\tRaw: %f cosine, %f euclidean' % (accCos,accEuc))
print('\tBipolarized: %f cosine, %f euclidean' % (accCosBi,accEucBi))
c['multiLVQCentroid'] = {'centroids':cent, 'bipolarizedCentroids':centBi, 'labels':lab, 'accCos':accCos, 'accEuc':accEuc, 'accCosBi':accCosBi, 'accEucBi':accEucBi}

d = np.diagonal(cdist(init_prototypes,centBi,'hamming'))
print('\tAverage distance between prototypes: %f' % (np.mean(d)))

In [None]:
X = np.copy(hv)
reducer = umap.UMAP(n_neighbors=40,min_dist=0.99,metric='hamming')
print('Running UMAP on all data...')
start = time.time()
trans = umap.UMAP(n_neighbors=40,min_dist=0.99,metric='hamming').fit(X)
stop = time.time()
print('\tTook %f seconds' % (stop - start))
hvRed = trans.embedding_

In [None]:
proj = trans.embedding_

In [None]:
for k in c.keys():
    centBi = c[k]['bipolarizedCentroids']
    centProj = trans.transform(centBi)
    lab = c[k]['labels']
    
    f,ax = plt.subplots(figsize=(12,12))
    sns.scatterplot(*proj.T,hue=gestLabel,palette=gPalette,alpha=0.5,legend=False,ax=ax)
    sns.scatterplot(*centProj.T,hue=lab.flatten(),palette=gPalette,marker='P',edgecolor='k',s=500,ax=ax)
    ax.set(title=k,xticklabels=[],yticklabels=[])
    
    if len(lab) > numGestures:
        numTop = 20

        f1,ax1 = plt.subplots(numGestures,numPositions,figsize=(30,30))
        f2,ax2 = plt.subplots(numGestures,numPositions,figsize=(30,30))
        f1.suptitle(k + ' top 20 hits - gesture')
        f2.suptitle(k + ' top 20 hits - position')

        for g in gestures:
            for p in positions:
                X = hv[(gestLabel == g) & (posLabel == p)]
                d = cdist(X,centBi,'hamming')
                # df = pd.DataFrame(d)
                # df = df.melt()
                # df['gest'] = gestLabel
                # f,ax = plt.subplots(figsize=(20,20))
                # %time g = sns.stripplot(x='variable',y='value',hue='gest',palette=gPalette,data=df)
                labIdx = np.argsort(d,axis=1)
                gestRanks = np.zeros((len(labIdx),numTop))
                posRanks = np.zeros((len(labIdx),numTop))
                for i,l in enumerate(labIdx):
                    gestRanks[i,:] = combs[l][:numTop,0]
                    posRanks[i,:] = combs[l][:numTop,1]

                a = ax1[g,p].imshow(gestRanks,aspect='auto',cmap='tab20',vmin=0,vmax=20)
                ax1[g,p].set(xticks=[],yticks=[])

                a = ax2[g,p].imshow(posRanks,aspect='auto',cmap='tab20',vmin=0,vmax=20)
                ax2[g,p].set(xticks=[],yticks=[])
        plt.show()
            

In [None]:
f,ax = plt.subplots(figsize=(12,12))
sns.scatterplot(*proj.T,hue=gestLabel,palette=gPalette,edgecolor=None,ax=ax)
ax.set(title='Labeled by gesture',xticklabels=[],yticklabels=[])

In [None]:
f,ax = plt.subplots(figsize=(12,12))
sns.scatterplot(*proj.T,hue=posLabel,palette=pPalette,ax=ax)
ax.set(title='Labeled by arm position',xticklabels=[],yticklabels=[])

In [None]:
for k in c.keys():
    centBi = c[k]['bipolarizedCentroids']
    lab = c[k]['labels']
    if len(lab) > numGestures:
        f,ax = plt.subplots(numGestures,numPositions,figsize=(20,20))
        for g in gestures:
            for p in positions:
                X = hv[(gestLabel == g) & (posLabel == p)]
                d = cdist(X,centBi)
                labIdx = np.argsort(d,axis=1)
                numTop = len(lab)
                gestRanks = np.zeros((len(labIdx),numTop))
                posRanks = np.zeros((len(labIdx),numTop))
                for i,l in enumerate(labIdx):
                    gestRanks[i,:] = combs[l][:numTop,0]
                    posRanks[i,:] = combs[l][:numTop,1]
                firstMiss = np.array([np.min(np.argwhere(y != g)) for y in gestRanks])
                coveredSamples = np.argwhere(firstMiss > 1)
                barrier = [posRanks[t][0][1:int(firstMiss[t])] for t in coveredSamples]
                if barrier:
                    ax[g,p].hist(np.hstack(barrier))
                    ax[g,p].set(ylim=(0,240))
    plt.show()