In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist, pdist

import matplotlib.pyplot as plt
import seaborn as sns
import time

In [2]:
# select dataset and encoding type
dataName = 'allHV.npz'
emgHVType =  'hvRel'

allHV = np.load(dataName)

# extract data and labels based on gesture, trial, and position
hv = allHV[emgHVType]
gestLabel = allHV['gestLabel']
posLabel = allHV['posLabel']
trialLabel = allHV['trialLabel']

combGP, groupGP = np.unique(np.column_stack((gestLabel,posLabel)),axis=0,return_inverse=True)
combGPT, groupGPT = np.unique(np.column_stack((gestLabel,posLabel,trialLabel)),axis=0,return_inverse=True)

# get list of unique values for each label
gestures = np.unique(gestLabel)
positions = np.unique(posLabel)
trials = np.unique(trialLabel)

numGestures = len(gestures)
numPositions = len(positions)
numTrials = len(trials)

# get data size info
D = hv.shape[1] # hypervector dimension
numHV = 80 # number of examples per trial

# color palettes for plotting
gPalette = sns.color_palette('tab20', numGestures)
pPalette = sns.color_palette('tab20', numPositions)

In [3]:
def bipolarize(Y):
    X = np.copy(Y)
    X[X > 0] = 1.0
    X[X < 0] = -1.0
    X[X == 0] = np.random.choice([-1.0, 1.0], size=len(X[X == 0]))
    return X

In [4]:
def centroids(X,label=None):
    if label is not None:
        cLabel = np.unique(label)
        c = np.zeros((len(cLabel), X.shape[1]))
        for i,l in enumerate(cLabel):
            c[i,:] = bipolarize(np.sum(X[label==l],axis=0))
    else:
        c = bipolarize(np.sum(X,axis=0)).reshape(1,-1)
        cLabel = [0]
    return cLabel, c.astype('int')

In [5]:
def hdc_agglom(X):
    N = len(X)
    clusterLabels = np.zeros((N,N)).astype('int')
    clusterLabels[0,:] = np.arange(N)
    
    calcs = {}
    for n in range(1,N):
        # get cluster labels from previous iteration
        prevClust = clusterLabels[n-1]
        numClust = len(np.unique(prevClust))
        print(numClust, prevClust)
        
        # calculate the minimum distances 
        bestChange = np.inf
        for i in range(numClust):
            for j in range(i+1,numClust):
                idxA = np.where((prevClust == i))[0]
                if tuple(idxA) not in calcs.keys():
                    centA = centroids(X[idxA])[1]
                    errA = np.mean(cdist(centA,X[idxA],'hamming'))
                    calcs[tuple(idxA)] = errA
                else:
                    errA = calcs[tuple(idxA)]
                
                idxB = np.where((prevClust == j))[0]
                if tuple(idxB) not in calcs.keys():
                    centB = centroids(X[idxB])[1]
                    errB = np.mean(cdist(centB,X[idxB],'hamming'))
                    calcs[tuple(idxB)] = errB
                else:
                    errB = calcs[tuple(idxB)]
                
                idxC = np.where((prevClust == i) | (prevClust == j))[0]
                if tuple(idxC) not in calcs.keys():
                    centC = centroids(X[idxC])[1]
                    errC = np.mean(cdist(centC,X[idxC],'hamming'))
                    calcs[tuple(idxC)] = errC
                else:
                    errC = calcs[tuple(idxC)]
                                
                totalChange = errC - errA - errB
                
                if totalChange < bestChange:
                    bestChange = totalChange
                    bestIdx = idxC
        
        newClust = prevClust
        newClust[bestIdx] = max(newClust) + 1
        
        _, newClust = np.unique(newClust,return_inverse=True)
        clusterLabels[n] = newClust
    
    return clusterLabels

In [6]:
g = 0
X = hv[gestLabel == g]

prevTime = time.time()

N = len(X)
clusterLabels = np.zeros((N,N)).astype('int')
clusterLabels[0,:] = np.arange(N)

calcs = {}
for n in range(1,N):
    # get cluster labels from previous iteration
    prevClust = np.copy(clusterLabels[n-1])
    numClust = len(np.unique(prevClust))
    newTime = time.time()
    print(numClust, prevClust, (newTime - prevTime))
    prevTime = newTime

    # calculate the minimum distances 
    bestChange = np.inf
    for i in range(numClust):
        for j in range(i+1,numClust):
            idxA = np.where((prevClust == i))[0]
            if tuple(idxA) not in calcs.keys():
                centA = centroids(X[idxA])[1]
                errA = np.mean(cdist(centA,X[idxA],'hamming'))
                calcs[tuple(idxA)] = errA
            else:
                errA = calcs[tuple(idxA)]

            idxB = np.where((prevClust == j))[0]
            if tuple(idxB) not in calcs.keys():
                centB = centroids(X[idxB])[1]
                errB = np.mean(cdist(centB,X[idxB],'hamming'))
                calcs[tuple(idxB)] = errB
            else:
                errB = calcs[tuple(idxB)]

            idxC = np.where((prevClust == i) | (prevClust == j))[0]
            if tuple(idxC) not in calcs.keys():
                centC = centroids(X[idxC])[1]
                errC = np.mean(cdist(centC,X[idxC],'hamming'))
                calcs[tuple(idxC)] = errC
            else:
                errC = calcs[tuple(idxC)]

            totalChange = errC - errA - errB

            if totalChange < bestChange:
                bestChange = totalChange
                bestIdx = idxC

    newClust = prevClust
    newClust[bestIdx] = max(newClust[bestIdx])

    _, temp = np.unique(newClust,return_inverse=True)
    clusterLabels[n] = np.copy(temp)

1920 [   0    1    2 ... 1917 1918 1919] 0.02367401123046875
1919 [   0    1    2 ... 1916 1917 1918] 821.8294219970703
1918 [   0    1    2 ... 1915 1916 1917] 31.563873052597046
1917 [   0    1    2 ... 1914 1915 1916] 32.1731960773468
1916 [   0    1    2 ... 1913 1914 1915] 34.086734771728516
1915 [   0    1    2 ... 1912 1913 1914] 35.91700720787048
1914 [   0    1    2 ... 1911 1912 1913] 34.66499996185303
1913 [   0    1    2 ... 1910 1911 1912] 35.403817892074585
1912 [   0    1    2 ... 1909 1910 1911] 35.72616505622864
1911 [   0    1    2 ... 1908 1909 1910] 35.862147092819214
1910 [   0    1    2 ... 1907 1908 1909] 35.63367486000061
1909 [   0    1    2 ... 1906 1907 1908] 36.42501616477966
1908 [   0    1    2 ... 1905 1906 1907] 36.1197566986084
1907 [   0    1    2 ... 1904 1905 1906] 35.97235298156738
1906 [   0    1    2 ... 1903 1904 1905] 36.053741216659546
1905 [   0    1    2 ... 1902 1903 1904] 36.157697916030884
1904 [   0    1    2 ... 1901 1902 1903] 37.641535

KeyboardInterrupt: 