# Robust Lookup Table: AI Model

The goal of this notebook is to explore and experiment with a deep learning approach to create the Robust Lookup Table.

### Summary

* #### I. Generate fake scenarios & backends
* #### II. Design Deep learning model
* #### II. Train model


## I. Generate fake scenarios & backends

### I.1. Generate fake Scenarios


In [1]:
import random
import uuid
import hashlib
import numpy as np

In [2]:
# -- Fake scenarios

# ScenarioGeneratorConfig
# - size: the fixed size of the lookup table.
# - nBeforeBounds(x, y): nBefore ∈ [x, y].
# - nAfterBounds(x, y): nAfter ∈ [x, y].
# - variance(x, y): x < min(nBefore,nAfter)/max(nBefore,nAfter); y < max(nBefore,nAfter) - min(nBefore,nAfter)
# # - sizeBounds(x, y): lookup table size ∈ [x, y].
class ScenarioGeneratorConfig:
    size: int
    nBeforeBounds: (int, int)
    nAfterBounds: (int, int)
    variance: (float, int)
    # sizeBounds: (int, int)

    def __init__(
        self,
        size: int,
        nBeforeBounds: (int, int),
        nAfterBounds: (int, int),
        variance: (float, int),
        # sizeBounds: (int, int),
    ):
        if nBeforeBounds[1] > size or nAfterBounds[1] > size:
            raise Exception("nBeforeBounds and nAfterBounds cannot exceed size")

        self.size = size
        self.nBeforeBounds = nBeforeBounds
        self.nAfterBounds = nAfterBounds
        self.variance = variance
        # self.sizeBounds = sizeBounds

class Scenario:
    nBefore: int
    nAfter: int
    size: int

def validate_scenario(cfg: ScenarioGeneratorConfig, scenario: Scenario) -> bool:
    var = cfg.variance[0]
    delta = cfg.variance[1]

    _min = min([scenario.nBefore, scenario.nAfter])
    _max = max([scenario.nBefore, scenario.nAfter])
    _var = _min/_max
    _delta = _max - _min
    _sz = scenario.size

    return _var <= var and _delta <= delta and _max <= _sz and _min != _max

# creates a new scenario generator.
def new_scenario_generator(cfg):
    while True:
        scenario = Scenario()
        scenario.nBefore = random.randint(cfg.nBeforeBounds[0], cfg.nBeforeBounds[1])
        scenario.nAfter = random.randint(cfg.nAfterBounds[0], cfg.nAfterBounds[1])
        scenario.size = cfg.size
        # scenario.size = random.randint(cfg.sizeBounds[0], cfg.sizeBounds[1])

        if validate_scenario(cfg, scenario):
            yield scenario

In [3]:
nBeforeBounds = (3, 47)
nAfterBounds = (1, 47)
variance = (1.0, 10)
size = 47

cfg = ScenarioGeneratorConfig(size, nBeforeBounds, nAfterBounds, variance)
sc = new_scenario_generator(cfg)

for i in range(3):
    s = next(sc)
    print(s.__dict__)

{'nBefore': 11, 'nAfter': 13, 'size': 47}
{'nBefore': 10, 'nAfter': 7, 'size': 47}
{'nBefore': 13, 'nAfter': 16, 'size': 47}


### I.2. Generate fake Backends

In [4]:
class Backend:
    id: str
    h0: int
    h1: int
    h2: int
    h3: int

    def __init__(self):
        self.id = uuid.uuid4()
        
        _h = hashlib.sha256()
        _h.update(self.id.bytes_le)
        _b = _h.digest()
        self.h0 = int.from_bytes(_b[0:8], "little")
        self.h1 = int.from_bytes(_b[8:16], "little")
        self.h2 = int.from_bytes(_b[16:24], "little")
        self.h3 = int.from_bytes(_b[24:32], "little")

def list_permutation(l: list, size: int) -> list:
    _p = np.random.permutation(l)
    return _p[0:size]

# new_backend_generator takes a list of Scenario and yields a tuple of 2 Backends.
# The "before" list and the "after" list.
def new_backend_generator(scenarioGenerator):
    while True:
        sc = next(scenarioGenerator)
        _min = min([sc.nBefore, sc.nAfter])
        _max = max([sc.nBefore, sc.nAfter])
        l_min = []
        l_max = []

        # create the l_max backend array.
        l_max = [ Backend() for _ in range(_max) ]
        # for _ in range(_max):
        #     l_max.append(Backend())

        # create l_min array by randomly choosing _min elements of l_max.
        l_min = list_permutation(l_max, _min)

        # sort both arrays.
        l_max = sorted(l_max, key=lambda x: str(x.id))
        l_min = sorted(l_min, key=lambda x: str(x.id))
        
        if sc.nBefore < sc.nAfter:
            yield (l_min, l_max)
        else:
            # print([x.__dict__ for x in l_min])
            yield (l_max, l_min)

In [5]:
generator = new_backend_generator(new_scenario_generator(cfg))

for i in range(2):
    t = next(generator)
    print(f"- Before: len={len(t[0])} example_value={t[0][0].__dict__}")
    print(f"- After: len={len(t[1])} example_value={t[1][0].__dict__}")

- Before: len=46 example_value={'id': UUID('081c6938-5cf1-457e-bb1a-53d9c19d4c33'), 'h0': 12177907672886707155, 'h1': 4090156810695666339, 'h2': 10037499313374949002, 'h3': 15345484821649218001}
- After: len=41 example_value={'id': UUID('081c6938-5cf1-457e-bb1a-53d9c19d4c33'), 'h0': 12177907672886707155, 'h1': 4090156810695666339, 'h2': 10037499313374949002, 'h3': 15345484821649218001}
- Before: len=14 example_value={'id': UUID('0ed6cee3-3ac6-405b-8718-749764132de2'), 'h0': 9265070034081372038, 'h1': 11708355870753080384, 'h2': 5420950698529392131, 'h3': 5135519590266678383}
- After: len=12 example_value={'id': UUID('0ed6cee3-3ac6-405b-8718-749764132de2'), 'h0': 9265070034081372038, 'h1': 11708355870753080384, 'h2': 5420950698529392131, 'h3': 5135519590266678383}


## II. Design Deep learning model

### Definitions

- Let `m` equal to the length of the lookup table: `m=len(lookup_table)`.
- Let `input` an array of length equal to m: `len(input)=m`. 
  - Each entry in `input` represents a backend.
- Let `h(i)` the hash of the i-th backend in `input`.
- Let `input[i]=h(i) % m`.
- Let `n` the number of backend actually represented in `input`.
  - Because: `nAfter != nBefore` and `max(nAfter,nBefore) <= m`.
- Let `output` a matrix of size `m*m`.
- Let `output[j]` the j-th row in the `output` matrix.
  - The j-th row of the `output` matrix represents the j-th entry of
     the lookup table.
- Let `o(i,j)` the i-th entry in `output[j]`.
  - `o(i,j)` is the probability of the i-th backend being mapped to the
     j-th entry of the lookup table.

### Input data

Problem 1: How to represent data in the inputs that is unmapped? 
- E.g. if the modulo of a hash is equal to 0, how should we represent entries
  that are out of bound.
- In other words, if `n=13` and `m=47`, how do we represent the entries with
  index in the range of [13:47]?

Definitions:
- Let `in-bound` entries the name of entries in the range [0:13].
- Let `out-of-bound` entries the name of entries in the range [13:47].

Solution:
- If we normalize `in-bound` entries as real numbers in [0,1], then we can
set `out-of-bound` entries to `-1`.
- Another solution would be to represent the input as a `m*m` matrix. 
  - The i-th row representing the i-th backend.
  - The j-th entry in i-th row representing the modulo of the hash of the i-th backend
  - If the j-th entry of the i-th row is equal to 1, it means 
  - If all entries of the i-th row are equal to 0, then it means there are no backends 
    there.

Problem 2: what if multiple backend have the same modulo?
- This is particularly problematic if 2 subsequent backends resolves to the same modulo
  and 1 of the backend becomes down. 
- In that case, there is not way to identify which backend was dropped from the model's
  point of view. 
- Hence there is a 50% chance to reaffect packets away from a healthy backend.

Solution:
- Compute multiple hash for each backend. Or split the 256-bit hash into 4 int64 and 
  compute 4 modulo. The probability of encountering 4 collisions in the same order
  would be significantly lower (the actual improvement has not be calculated).

### Model training:

- Pass the "before" training data through the model.
- Pass the "after" training data through the model.
- Compute "even distribution" score: to ensure the backends are evenly
  distributed in the output.
- Compute "validity" score: 
  - to ensure the model does not make inference `out-of-bound`.
- Optional: compute a "confidence score", by calculating how likely the top inference
  is compared to other o(i,j) value in i-th row.
- Compute the % of unchanged entries between "Before" and "After".
- Compute the stability score.
- Compute loss function from "validity", "even distribution" and "stability" score.


In [6]:
import os
import math
import datetime
import time

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [25]:
class Output:
    # -- size m of the lookup table.
    size: int
    # -- the raw backend input tensor
    raw_input: list
    # -- the length of the input (also named n)
    input_len: int
    # -- the raw tensor output. Size=m*m.
    raw_output: list
    # -- the cleaned list of Union[uuid,None] output. Size=n (n=len(backends)
    output: list[str]
    
    def __init__(self, size, raw_input, raw_output, output):
        self.size = size
        self.raw_input = raw_input
        self.input_len = len(raw_input)
        self.raw_output = raw_output
        self.output = output

In [78]:
# NN takes `m` (int) as a paremeter. 
# `m` is the length of the lookup table.
# input dimensions is a tensor of size `m` and dimension 1.
# output dimensions are `m*m` matrices.
class NN(nn.Module):
    size: int

    def __init__(self, m: int):
        super().__init__()

        self.size = m
        self.flatten = nn.Flatten(start_dim=0)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(m*4, m),
            nn.ReLU(),
            nn.Linear(m, m),
            nn.ReLU(),
            nn.Linear(m, m*m),
        )
        self.softmax = nn.Softmax(dim=1)

    # The input is a tuple of ([]Backend, []Backend):
    #  - input[0] named `__raw_b_in`, is a list of size `b_len`.
    #  - input[1] named `__raw_a_in`, is a list of size `a_len`.
    # The output is a tuple of ([]Union(str,None), Union(str,None)).
    #  - output[0] is a list of size `b_len`(=len(__raw_b_in)).
    #  - output[1] is a list of size `a_len`(=len(__raw_a_in)).
    def forward(self, x) -> (list, list):
        __raw_b_in, __raw_a_in = x

        # -- prepare
        __b_in = self.__clean_input(__raw_b_in)
        __a_in = self.__clean_input(__raw_a_in)

        # -- infer
        __b_out = self.__forward_once(__b_in)
        __a_out = self.__forward_once(__a_in)

        # -- post-process
        cleaned_b_out = self.__clean_output(__raw_b_in, __b_out)
        cleaned_a_out = self.__clean_output(__raw_a_in, __a_out)

        b_out = Output(size, __raw_b_in, __b_out, cleaned_b_out)
        a_out = Output(size, __raw_a_in, __a_out, cleaned_a_out)

        return b_out, a_out

    def __forward_once(self, x):
        x = self.flatten(x)
        x = self.linear_relu_stack(x)
        x = torch.reshape(x, [self.size, self.size]) # make it a 2-dimensional array.
        return self.softmax(x)

    def __clean_input(self, x):
        out = np.zeros((size,4))
        for i, backend in enumerate(x):
            # backend[1] is the hash.
            out[i][0] = backend.h0 % self.size
            out[i][1] = backend.h1 % self.size
            out[i][2] = backend.h2 % self.size
            out[i][3] = backend.h3 % self.size
        return torch.tensor(out, requires_grad=True, dtype=torch.float32)

    def __clean_output(self, x_in, x_out):
        # We want to return a list of length `m` that
        # associate each i-th entry with a backend uuid.
        # 
        # An entry at index `i` is obtained by fetching the
        # uuid of the backend at index `j` of `x_in`.
        # `j` is the index of the highest value of the j-th
        # row of `x_out`.
        # 
        # - x_in is the raw input.
        # - x_out are matrices of size m*m.
        out = []
        for row in x_out:
            # the first element is the max value.
            # we may want to output it in order to calculate the loss.
            # this would measure the confidence of the algorithm in the inference. 
            _, j = torch.max(row, 0) 
            if j < len(x_in):
                out.append(str(x_in[j].id))
            else:
                out.append(None)
        return out

model = NN(47).to(device)
model(next(generator))[0].__dict__

{'size': 47,
 'raw_input': [<__main__.Backend at 0x7f1e1a536ed0>,
  <__main__.Backend at 0x7f1e1a539c70>,
  <__main__.Backend at 0x7f1e1a537c80>,
  <__main__.Backend at 0x7f1e1a535c70>,
  <__main__.Backend at 0x7f1e1a535af0>,
  <__main__.Backend at 0x7f1e1a537140>,
  <__main__.Backend at 0x7f1e1a5390a0>,
  <__main__.Backend at 0x7f1e1a538650>,
  <__main__.Backend at 0x7f1e1a538d70>,
  <__main__.Backend at 0x7f1e1a535b20>,
  <__main__.Backend at 0x7f1e1a537080>,
  <__main__.Backend at 0x7f1e1a538890>,
  <__main__.Backend at 0x7f1e1a538380>,
  <__main__.Backend at 0x7f1e1a535940>,
  <__main__.Backend at 0x7f1e1a535df0>,
  <__main__.Backend at 0x7f1e1a536e70>,
  <__main__.Backend at 0x7f1e1a5351c0>,
  <__main__.Backend at 0x7f1e1a537d70>,
  <__main__.Backend at 0x7f1e1a538230>,
  <__main__.Backend at 0x7f1e1a5379b0>,
  <__main__.Backend at 0x7f1e1a537b60>,
  <__main__.Backend at 0x7f1e1a536c30>,
  <__main__.Backend at 0x7f1e1a537fe0>,
  <__main__.Backend at 0x7f1e1a5358b0>,
  <__main__.Ba

In [79]:
x = torch.rand(5)
print(x)
aw = torch.argwhere(x > 0.5)
print(aw)
print("n:", len(aw))

tensor([0.9600, 0.2058, 0.4792, 0.6136, 0.0961])
tensor([[0],
        [3]])
n: 2


In [86]:
# ValidityLoss is computed as the square of the sum of `out-of-bound` results.
class ValidityLoss(nn.Module):
    def __init__(self):
        super(ValidityLoss, self).__init__()

    # raw_output{before,after} are passed as arguments to brain autograd. 
    def forward(
        self, 
        outputs: (list[Output], list[Output]), 
        raw_output_before,
        raw_output_after,
    ):
        bef, aft = outputs
        
        loss = self.__compute_loss(bef.output)
        loss += self.__compute_loss(aft.output)

        return torch.tensor(float(loss), requires_grad=True)

    def __compute_loss(self, x) -> int:
        loss = 0
        for item in x:
            if item is None:
               loss +=1
        return loss

class DistributionLoss(nn.Module):
    pass

class StabilityLoss(nn.Module):
    # raw_output{before,after} are passed as arguments to brain autograd. 
    def forward(
        self, 
        outputs: (list[Output], list[Output]), 
        raw_output_before,
        raw_output_after,
    ):
        bef, aft = outputs
        
        loss = self.__compute_loss(bef.output)
        loss += self.__compute_loss(aft.output)

        return torch.tensor(float(loss), requires_grad=True)
    pass

inputs = ([0,1], [2,3])
outputs = (
    # size, raw_input, raw_output, output
    Output(2, ["noused","notused"], torch.tensor([[0.,1.],[2.,3.]], requires_grad=True), ["yolo", None] ),
    Output(3, ["noused","notused","notused"], torch.tensor([[0.,1.,2.],[3.,4.,5.]],requires_grad=True), ["yolo", None, None] ),
)

valLoss = ValidityLoss().forward(outputs, None, None)
distLoss = 0 # DistributionLoss().forward(outputs)
stabLoss = 0 # StabilityLoss().forward(outputs)

print("valLoss:", valLoss,", distLoss:", distLoss, ", stabLoss:", stabLoss)

valLoss: tensor(3., requires_grad=True) , distLoss: 0 , stabLoss: 0


## III. Train the model

In [81]:
# Algorithm parameters
m = 47
nBeforeBounds = (3, m)
nAfterBounds = (1, m)
variance = (1.0, 10)

# Hyperparameters
learning_rate = 1e-4
epochs = 4000
batch_size = 8

In [87]:
cfg = ScenarioGeneratorConfig(m, nBeforeBounds, nAfterBounds, variance)
backend_generator = new_backend_generator(new_scenario_generator(cfg))

model = NN(m).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = ValidityLoss()

total_params = sum(p.numel() for p in model.parameters())
print(f"model has {total_params} parameters")

for name, param in model.named_parameters():
    print(f"Parameter name: {name}, Requires grad: {param.requires_grad}")

start_time = time.time()
print("training model...")
for epoch in range(epochs):
    # -- reset optimizer
    optimizer.zero_grad()

    # -- init loss
    loss = 0.

    # -- run batch
    for i in range(batch_size):
        # -- generate inputs
        inputs = next(backend_generator)
        # -- run model
        outputs = model(inputs)
        # -- compute loss
        loss += loss_fn(outputs, outputs[0].raw_output, outputs[1].raw_output)

    loss = loss / batch_size
    loss.backward()

    # -- step
    optimizer.step()

    # -- log
    if epoch % 100 == 0:
        elapsed = datetime.timedelta(seconds=(time.time() - start_time))
        print(f"epoch {epoch+1}/{epochs}: loss={loss}, elapsed_time={elapsed}")
        for name, param in model.named_parameters():
            if name == "linear_relu_stack.4.bias":
                print(f"Parameter name: {name}, data_sample: {param.data[0]}")

print(f"Training done, elapsed_time={elapsed}")

model has 117171 parameters
Parameter name: linear_relu_stack.0.weight, Requires grad: True
Parameter name: linear_relu_stack.0.bias, Requires grad: True
Parameter name: linear_relu_stack.2.weight, Requires grad: True
Parameter name: linear_relu_stack.2.bias, Requires grad: True
Parameter name: linear_relu_stack.4.weight, Requires grad: True
Parameter name: linear_relu_stack.4.bias, Requires grad: True
training model...
epoch 1/4000: loss=40.0, elapsed_time=0:00:00.019176
Parameter name: linear_relu_stack.4.bias, data_sample: -0.11141757667064667
epoch 101/4000: loss=32.625, elapsed_time=0:00:01.593771
Parameter name: linear_relu_stack.4.bias, data_sample: -0.11141757667064667
epoch 201/4000: loss=44.625, elapsed_time=0:00:02.916196
Parameter name: linear_relu_stack.4.bias, data_sample: -0.11141757667064667
epoch 301/4000: loss=40.625, elapsed_time=0:00:04.328678
Parameter name: linear_relu_stack.4.bias, data_sample: -0.11141757667064667
epoch 401/4000: loss=34.75, elapsed_time=0:00:05