# VSA and SDM

In [1]:
import torchhd as thd
from hashlib import sha256
import torch

In [2]:
dim = 2000 # Vector dimension. Why was it chosen this high? Cite papers where confusion is not possible after a certain value.
n = 100000 # The no. of chunks found in the initial training set. 

cleanup = thd.memory.SparseDistributed(n, dim, dim) # cleanup/chunk memory. Use for querying and retrieval.

# We also need a memory that keeps track of wether we have encountered a chunk (can also be understood as token) so far or not.
encountered = {}

In [3]:
tokens = ["I", "go", "I", "go"]

# Generate 1-dim chunks.
for token in tokens:
    # If the chunk is hasn't been encountered before
    if encountered.get(token) == None:
        # Generate HV for value.
        val = thd.MAPTensor.random(1, dim)
        # Generate HV for key.
        key = thd.MAPTensor.random(1, dim)
        # Save val, key, and token in encountered memory
        encountered[token] = {'val': val, 'key': key, 'cnt': 1}
        # Add chunk representation (i.e., "key * val") to cleanup.
        cleanup.write(keys=key, values=val)
    # The chunk has been encountered before. Increase count value.
    else:
        encountered[token]['cnt'] += 1

# Generate 2-dim chunks.
# At this point we know for sure that we've encountered all the one-dimensional tokens.
for first, second in zip(tokens[:-1], tokens[1:]):
    # Generate HV for value.
    val = encountered[first]['key'] * encountered[first]['val'] + encountered[second]['key'] * encountered[second]['val']
    # Generate HV for key.
    key = thd.MAPTensor.random(1, dim)
    encountered_key = sha256(''.join([str(elem) for elem in val.tolist()[0]]).encode('utf-8')).hexdigest()
    if encountered.get(encountered_key) == None:
        encountered[encountered_key] = {'val': val, 'key': key, 'cnt': 1}
        # Add chunk representation (i.e., "key * val") to cleanup.
        cleanup.write(keys=key, values=val)
    else:
        encountered[encountered_key]['cnt'] += 1   

In [4]:
encountered

{'I': {'val': MAPTensor([[ 1.,  1., -1.,  ..., -1.,  1.,  1.]]),
  'key': MAPTensor([[ 1.,  1.,  1.,  ...,  1., -1., -1.]]),
  'cnt': 2},
 'go': {'val': MAPTensor([[-1., -1., -1.,  ..., -1.,  1., -1.]]),
  'key': MAPTensor([[-1.,  1.,  1.,  ...,  1., -1.,  1.]]),
  'cnt': 2},
 'e32c988a4d64ca8cd7eb03b5b9f92ae79ecac51df1a2621af558b46c0be25bb9': {'val': MAPTensor([[ 2.,  0., -2.,  ..., -2., -2., -2.]]),
  'key': MAPTensor([[ 1., -1.,  1.,  ..., -1.,  1., -1.]]),
  'cnt': 3}}

In [5]:
cleanup.read(encountered['I'].get('key')).sign()

MAPTensor([[ 1.,  1., -1.,  ..., -1.,  1.,  1.]])

In [6]:
# Values
vals = thd.MAPTensor.random(5, dim)
a, b, c, e, f = vals

# Keys
keys = thd.MAPTensor.random(5, dim)
arg1, arg2, arg3, arg4, arg5 = keys

# 2nd hyerachie
frames = thd.random(2, dim)

record_a = frames[0] * (arg1 * a + arg2 * b + arg3 * c + arg4 * e + arg5 * f)

In [7]:
# Retrieve value correpsonding to key arg1
arg1_dec = record_a * frames[0] * arg1

In [8]:
sims = thd.cosine_similarity(arg1_dec, vals)

In [9]:
sims

MAPTensor([ 4.4692e-01,  1.4516e-02, -2.4194e-02, -3.0792e-03, -4.3988e-04])

In [10]:
sdm = thd.memory.SparseDistributed(5, dim, dim)

In [11]:
sdm.write(keys=keys, values=vals)

In [12]:
read = sdm.read(keys)

In [13]:
thd.cosine_similarity(read, vals)

MAPTensor([[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]])

In [14]:
sdm.read(arg1)

MAPTensor([0., 0., 0.,  ..., 0., 0., 0.])

In [15]:
sdm.write(arg1, a)

In [16]:
arg1

MAPTensor([1., 1., 1.,  ..., 1., 1., 1.])