# Neural Memory System - Centre building

## Environment setup

In [1]:
import os
from pathlib import Path

In [2]:
CURRENT_FOLDER = Path(os.getcwd())

In [3]:
CD_KEY = "--CENTRE_BUILDING_DEMO_IN_ROOT"

if (
    CD_KEY not in os.environ
    or os.environ[CD_KEY] is None
    or len(os.environ[CD_KEY]) == 0
    or os.environ[CD_KEY] == "false"
):
    %cd -q ../../..
    
    ROOT_FOLDER = Path(os.getcwd()).relative_to(os.getcwd())
    CURRENT_FOLDER = CURRENT_FOLDER.relative_to(ROOT_FOLDER.absolute())
    
os.environ[CD_KEY] = "true"

In [4]:
print(f"Root folder:    {ROOT_FOLDER}")
print(f"Current folder: {CURRENT_FOLDER}")

Root folder:    .
Current folder: nemesys/demo/tentative


## Modules

In [5]:
from itertools import product
import math
import struct

import numpy as np
import torch
import torch.nn

from nemesys.hashing.minhashing.numpy_minhash import NumPyMinHash
from nemesys.modelling.analysers.modules.pytorch_analyser_lstm import PyTorchAnalyserLSTM
from nemesys.modelling.decoders.modules.pytorch_decoder_conv2d import PyTorchDecoderConv2D
from nemesys.modelling.encoders.modules.pytorch_encoder_linear import PyTorchEncoderLinear
from nemesys.modelling.routers.concatenation.minhash.minhash_concatenation_router import (
    MinHashConcatenationRouter
)
from nemesys.modelling.stores.pytorch_list_store import PyTorchListStore
from nemesys.modelling.synthesisers.modules.pytorch_synthesiser_linear import PyTorchSynthesiserLinear

In [6]:
torch.set_printoptions(sci_mode=False)

## Components setup

### Sizes

In [7]:
EMBEDDING_SIZE = 4

In [8]:
ANALYSER_CLASS_NAMES = ("statement",)
ANALYSER_OUTPUT_SIZE = EMBEDDING_SIZE

In [9]:
ENCODER_OUTPUT_SIZE = 3

In [10]:
DECODER_IN_CHANNELS = 1
DECODER_OUT_CHANNELS = 3
DECODER_KERNEL_SIZE = (1, ENCODER_OUTPUT_SIZE)

In [11]:
MINHASH_N_PERMUTATIONS = 4
MINHASH_SEED = 0

### Embedding setup

In [12]:
allowed_letters = [chr(x) for x in range(ord("A"), ord("Z") + 1)]
vocabulary = ["".join(x) for x in product(*([allowed_letters] * 3))]
word_to_index = {word: i for i, word in enumerate(vocabulary)}

In [13]:
embedding = torch.nn.Embedding(
    num_embeddings=len(word_to_index),
    embedding_dim=EMBEDDING_SIZE,
    max_norm=math.sqrt(EMBEDDING_SIZE),
)

### Analyser setup

In [14]:
analyser = PyTorchAnalyserLSTM(
    class_names=ANALYSER_CLASS_NAMES,
    input_size=EMBEDDING_SIZE,
    hidden_size=ANALYSER_OUTPUT_SIZE,
    batch_first=True,
)

### Encoder setup

In [15]:
encoder = PyTorchEncoderLinear(
    in_features=ANALYSER_OUTPUT_SIZE,
    out_features=ENCODER_OUTPUT_SIZE,
    content_key="content",
)

### Store setup

In [16]:
store = PyTorchListStore()

### Decoder setup

In [17]:
decoder = PyTorchDecoderConv2D(
    in_channels = DECODER_IN_CHANNELS,
    out_channels = DECODER_OUT_CHANNELS,
    kernel_size = DECODER_KERNEL_SIZE,
)

### Router setup

#### MinHash setup

In [35]:
def tensor_to_numpy(x: torch.Tensor):
    x = x.reshape((x.shape[0], -1)) # Preserve batches
    x = np.array(x, dtype=np.float32)
    
    return x


def preprocess_function(element):
    element_as_bytes = struct.pack("<f", float(element))
    element_as_int = np.fromstring(
        element_as_bytes, dtype=np.uint32
    ).astype(np.uint64)[0]

    return element_as_int

def numpy_to_tensor(x: np.ndarray):
    x_floats = np.vectorize(lambda x: x / ((2 ** 32) - 1))(x)
    
    return torch.tensor(x_floats, dtype=torch.float32)

In [19]:
minhash = NumPyMinHash(
    n_permutations=MINHASH_N_PERMUTATIONS,
    seed=MINHASH_SEED,
    preprocess_function=preprocess_function,
)

#### Continuing router setup

In [20]:
router = MinHashConcatenationRouter(minhash_instance=minhash)

### Synthesiser setup

## Runs

### Data preparation

In [21]:
inputs = ["AAA", "ABA", "BDC"]
has_a = [1 if "A" in x else 0 for x in inputs]

In [22]:
input_indices = [word_to_index[word] for word in inputs]
input_indices = torch.tensor(input_indices)
print(input_indices)

tensor([  0,  26, 756])


In [23]:
output_tensor = torch.tensor(has_a)
print(output_tensor)

tensor([1, 1, 0])


### Embedding run

In [24]:
embeddings = embedding(input_indices)
print(embeddings)

tensor([[ 0.0733,  1.6564,  0.0727,  1.0186],
        [ 0.3968, -0.2372, -1.5747, -0.4124],
        [ 0.2069,  0.6105, -0.5933, -0.8433]], grad_fn=<EmbeddingBackward>)


### Analyser run

In [25]:
analyser_output = analyser(embeddings.reshape(len(inputs), 1, -1))

for class_name in ANALYSER_CLASS_NAMES:
    print(f"{class_name}:")
    print(analyser_output[class_name]["content"])

statement:
tensor([[ 0.0044,  0.1220, -0.0175, -0.2743],
        [-0.0601, -0.0471, -0.1271,  0.2379],
        [-0.1024,  0.0011, -0.0486,  0.1101]], grad_fn=<IndexBackward>)


### Encoder run

In [26]:
encoder_output = encoder(analyser_output["statement"])
print(encoder_output)

{'content': tensor([[ 0.1218, -0.0339,  0.0212],
        [-0.0465,  0.0300,  0.0055],
        [-0.0192, -0.0080,  0.0010]], grad_fn=<MmBackward>)}


### Store run

In [27]:
store.append(encoder_output["content"])
print(store)

[tensor([[ 0.1218, -0.0339,  0.0212],
        [-0.0465,  0.0300,  0.0055],
        [-0.0192, -0.0080,  0.0010]])]


### Decoder run

In [28]:
decoder_output = decoder(store)
print(decoder_output)

{'content': tensor([[[[ 0.4530],
          [ 0.5088],
          [ 0.4872]],

         [[ 0.4030],
          [ 0.4953],
          [ 0.4768]],

         [[-0.1114],
          [-0.0418],
          [-0.0639]]]], grad_fn=<ThnnConv2DBackward>)}


### Router run

In [36]:
router_input = decoder_output["content"].squeeze(dim=0)

In [37]:
router_output = router(router_input)

  element_as_int = np.fromstring(


In [38]:
router_output = numpy_to_tensor(router_output)
print(router_output)

tensor([[0.4881, 0.8373, 0.6695, 0.0170, 0.7944, 0.6838, 0.8554, 0.2267, 0.2587,
         0.0597, 0.3019, 0.7954],
        [0.8945, 0.7646, 0.3306, 0.5174, 0.5584, 0.1036, 0.1118, 0.1061, 0.2825,
         0.2752, 0.6448, 0.2994],
        [0.9799, 0.3582, 0.8451, 0.7665, 0.2921, 0.9583, 0.6857, 0.8045, 0.6184,
         0.1507, 0.1753, 0.6196]])


### Synthesiser run