# Neural Memory System - MinHashConcatenationRouter demo

## Environment setup

In [1]:
import os
from pathlib import Path

In [2]:
CURRENT_FOLDER = Path(os.getcwd())

In [3]:
CD_KEY = "--MINHASH_CONCATENATION_ROUTER_DEMO_IN_ROOT"

if (
    CD_KEY not in os.environ
    or os.environ[CD_KEY] is None
    or len(os.environ[CD_KEY]) == 0
    or os.environ[CD_KEY] == "false"
):
    %cd -q ../../..
    
    ROOT_FOLDER = Path(os.getcwd()).relative_to(os.getcwd())
    CURRENT_FOLDER = CURRENT_FOLDER.relative_to(ROOT_FOLDER.absolute())
    
os.environ[CD_KEY] = "true"

In [4]:
print(f"Root folder:    {ROOT_FOLDER}")
print(f"Current folder: {CURRENT_FOLDER}")

Root folder:    .
Current folder: demo/components/routers


## Modules

In [5]:
import struct

import numpy as np
import torch
import torch.nn

from nemesys.hashing.minhashing.numpy_minhash import NumPyMinHash
from nemesys.modelling.routers.concatenation.minhash.minhash_concatenation_router import (
    MinHashConcatenationRouter
)

In [6]:
torch.set_printoptions(sci_mode=False)

## MinHash setup

In [7]:
def tensor_to_numpy(x: torch.Tensor):
    x = x.reshape((x.shape[0], -1)) # Preserve batches
    x = np.array(x, dtype=np.float32)
    
    return x


def preprocess_function(element):
    element_as_bytes = struct.pack("<f", float(element))
    element_as_int = np.fromstring(
        element_as_bytes, dtype=np.uint32
    ).astype(np.uint64)[0]

    return element_as_int

In [8]:
minhash = NumPyMinHash(
    n_permutations=4,
    seed=0,
    preprocess_function=preprocess_function,
)

## Router setup

In [9]:
router = MinHashConcatenationRouter(minhash_instance=minhash)

## Data setup

In [10]:
n_components = 3

batch_size = 4
input_size = 5

In [11]:
input_list = [
    torch.normal(mean=0, std=1, size=(batch_size, input_size))
    for _ in range(n_components)
]
transformed_input_list = [
    tensor_to_numpy(x)
    for x in input_list
]

for x in transformed_input_list:
    print(x)

[[ 0.3046537   0.44374186 -0.3322699  -0.5119507  -0.6314258 ]
 [-0.16624093 -0.7653986   0.37151664  1.1652074  -0.8043268 ]
 [-1.5527984  -0.9281696   1.0720382   0.01733972  0.98142576]
 [-0.0818289   0.25497547  0.62854296  0.06091348 -1.0241609 ]]
[[ 0.35646385 -1.4483922   1.5252744   1.3462796   0.57404625]
 [-0.5017233  -0.23838635 -0.30878955  0.43304852 -0.6522648 ]
 [-1.6178601   1.7178988  -0.38555825  1.9820848  -1.4452257 ]
 [ 1.3866167   1.0575365   1.0813372   0.10766499 -1.1934907 ]]
[[-1.2594994   1.1616645   0.14051685 -1.3803155  -1.3005396 ]
 [-0.08484103 -0.11379652  1.1112069  -1.2638999   0.46519783]
 [ 0.5398138   0.2742598  -1.6134946  -0.19242162 -1.1512389 ]
 [-1.6646423  -0.8908239   0.20249332  0.03411724 -0.2655127 ]]


## Results

In [12]:
results = router(transformed_input_list)

  element_as_int = np.fromstring(


In [13]:
t = torch.tensor([batch.astype(np.int64) for batch in results])
print(t.shape)
print(t)

torch.Size([4, 12])
tensor([[ 811112042, 2044578876,  986428128,  313898414,  918139401,  465593181,
          557243040,   49090601,   81840628, 1043316506,  687696800, 1395297723],
        [  78445237,  958941415,  348683803,  426916073, 1182372534,   51443775,
           92504870,  374942956,  243289224,  692504985, 1512657859,  131980672],
        [ 783823861,  200673411, 1475591209, 1624117407, 2184364439,    5419753,
          470582986,  189207870,   40406291,  351238330,  628843326,  330343608],
        [ 353610889,  693314985, 3400939406, 2688117124, 1280997748, 2821960091,
          501761516, 1082071508,  670076042,  896605754,  380623797,  106657242]])
