## Setup

In [2]:
import sys
sys.path.append('../../pytei')

In [9]:
import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader, TensorDataset

import matplotlib.pyplot as plt
from pytei import Injector

# device = torch.device('cuda:0')
# Set the default device to CPU
device = torch.device('cpu')
torch.set_default_tensor_type('torch.FloatTensor')
torch.set_printoptions(sci_mode = False)

  _C._set_default_tensor_type(t)


### Configure Dummy Model and Synthetic Dataset

In [4]:
class DatasetGenerator():
    def __init__(self, num_sample, dense_dim, sparse_dim, device) -> None:
        super().__init__()
        self.num_sample = num_sample
        self.dense_dim = dense_dim
        self.sparse_dim = sparse_dim
        self.device = device
    
    def generate_dataset(self, input_sparsity): 
        self.sparse_features = nn.functional.dropout(torch.ones(size = (self.num_sample, self.sparse_dim, )), p = 1 - input_sparsity) * input_sparsity
        self.dense_features = torch.rand(size = (self.num_sample, self.dense_dim, ))
        self.features = torch.concat((self.sparse_features, self.dense_features), dim = -1)
        self.features = self.features.to(self.device)
        self.dataset = TensorDataset(self.features)

    def generate_dataloader(self):
        self.dataloader = DataLoader(self.dataset, batch_size = 2048, shuffle = False)
    
    def get_data_n_loader(self):
        return (self.dataset, self.dataloader)

In [5]:
def mlp_layer_config(mlp_layer, input_dim, hidden_dim, dense_output_dim, pred_input_dim):
    if mlp_layer == 1:
        dense_extractor = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, dense_output_dim),
            nn.ReLU(),
        )
        predictor = nn.Sequential(
            nn.Linear(pred_input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
    elif mlp_layer == 2:
        dense_extractor = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, dense_output_dim),
            nn.ReLU(),
        )
        predictor = nn.Sequential(
            nn.Linear(pred_input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
        )
    return dense_extractor, predictor

class TestDRS(nn.Module):
    def __init__(self, sparse_dim, dense_dim, dense_extractor, predictor, embed_dim) -> None:
        super().__init__()
        
        self.sparse_dim = sparse_dim
        self.dense_dim = dense_dim

        self.sparse_extractor = nn.EmbeddingBag(num_embeddings = sparse_dim, embedding_dim = embed_dim, mode = 'sum')
        self.dense_extractor = dense_extractor
        self.predictor = predictor

    def forward(self, x):
        dense_features = self.dense_extractor(x[..., self.sparse_dim:])
        sparse_features = self.sparse_extractor(x[..., :self.sparse_dim].to(int)) 
        x = torch.concat((dense_features, sparse_features), dim = -1)
        x = self.predictor(x)
        return x
    

### Obtain Golden Results (error-free)

In [10]:
input_sparsities = [.001, .01, .1]
dense_dim = 128
sparse_dim = 8192
num_sample = 10000

example_data = {}
dataset_generator = DatasetGenerator(num_sample, dense_dim, sparse_dim, device)


for input_sparsity in input_sparsities:
    dataset_generator.generate_dataset(input_sparsity)
    dataset_generator.generate_dataloader()
    example_data[input_sparsity] = dataset_generator.get_data_n_loader()

torch.save(example_data, './example_input.pth')
example_data = torch.load('./example_input.pth')

In [11]:
embed_dims = [64, 128, 256, 512]
mlp_dims = [64, 128, 256, 512]
mlp_layers = [1, 2]
input_sparsities = [.001, .01, .1]
dense_dim = 128
sparse_dim = 8192
num_sample = 10000

results_golden = torch.zeros(size = (
    len(mlp_layers),
    len(mlp_dims),
    len(embed_dims),
    len(input_sparsities),
    num_sample
))
example_models = {}

dense_output_dim = 32

for mlp_layer_i, mlp_layer in enumerate(mlp_layers):
    for mlp_dim_i, mlp_dim in enumerate(mlp_dims):
        for embed_dim_i, embed_dim in enumerate(embed_dims):
            pred_input_dim = dense_output_dim + embed_dim
            dense_extractor, predictor = mlp_layer_config(mlp_layer, dense_dim, mlp_dim, dense_output_dim, pred_input_dim)
            test_drs = TestDRS(sparse_dim, dense_dim, dense_extractor, predictor, embed_dim)
            for param in test_drs.parameters():
                if len(param.shape) > 1:
                    nn.init.kaiming_uniform_(param)
            example_models[str(mlp_layer) + '_' + str(mlp_dim) + '_' + str(embed_dim)] = test_drs
            test_drs = test_drs.eval().to(device)
            for input_sparsity_i, input_sparsity in enumerate(input_sparsities):
                _, example_loader = example_data[input_sparsity]
                preds = []
                for item in example_loader:
                    pred = test_drs(*item).detach() 
                    preds.extend(pred.tolist())
                preds = torch.tensor(preds)
                results_golden[mlp_layer_i][mlp_dim_i][embed_dim_i][input_sparsity_i] = preds.squeeze()

torch.save(example_models, './example_models.pth')    
torch.save(results_golden, './results_golden.pth')   

### Obtain Results (with error)

- Inject to all components, MLP and embed respectively. Can play around parameters such as `mlp_dim`, `mlp_layer`, `dense_dim` etc.

In [15]:

embed_dims = [64, 128, 256, 512]
mlp_dims = [64, 128, 256, 512]
mlp_layers = [1, 2]
input_sparsities = [.001, .01, .1]
dense_dim = 128
sparse_dim = 8192
num_sample = 10000

bers = [1e-8, 5e-8, 1e-7, 5e-7, 1e-6]
results_error_injected_all = torch.zeros(size = (
    len(bers),
    len(mlp_layers),
    len(mlp_dims),
    len(embed_dims),
    len(input_sparsities),
    num_sample
))

dense_output_dim = 32

example_models = torch.load('./example_models.pth')

for ber_i, ber in enumerate(bers):
    print(ber)
    for mlp_layer_i, mlp_layer in enumerate(mlp_layers):
        for mlp_dim_i, mlp_dim in enumerate(mlp_dims):
            for embed_dim_i, embed_dim in enumerate(embed_dims):
                pred_input_dim = dense_output_dim + embed_dim
                test_drs = example_models[str(mlp_layer) + '_' + str(mlp_dim) + '_' + str(embed_dim)]
                test_drs = test_drs.eval().to(device)

                for input_sparsity_i, input_sparsity in enumerate(input_sparsities):
                    _, example_loader = example_data[input_sparsity]
                    preds = []
                    for item in example_loader:
                        injector = Injector("./targets", p = ber, device = device, verbose = False)
                        injector.inject(test_drs)
                        del injector
                        pred = test_drs(*item).detach() 
                        preds.extend(pred.tolist())
                    preds = torch.tensor(preds)
                    results_error_injected_all[ber_i][mlp_layer_i][mlp_dim_i][embed_dim_i][input_sparsity_i] = preds.squeeze()

torch.save(results_error_injected_all, './results_error_injected_all.pth')

1e-08
5e-08
1e-07
5e-07
1e-06


In [16]:
embed_dims = [64, 128, 256, 512]
mlp_dims = [64, 128, 256, 512]
mlp_layers = [1, 2]
input_sparsities = [.001, .01, .1]
dense_dim = 128
sparse_dim = 8192
num_sample = 10000

bers = [1e-8, 5e-8, 1e-7, 5e-7, 1e-6]
results_error_injected_mlp = torch.zeros(size = (
    len(bers),
    len(mlp_layers),
    len(mlp_dims),
    len(embed_dims),
    len(input_sparsities),
    num_sample
))

dense_output_dim = 32

example_models = torch.load('./example_models.pth')

for ber_i, ber in enumerate(bers):
    print(ber)
    for mlp_layer_i, mlp_layer in enumerate(mlp_layers):
        for mlp_dim_i, mlp_dim in enumerate(mlp_dims):
            for embed_dim_i, embed_dim in enumerate(embed_dims):
                pred_input_dim = dense_output_dim + embed_dim
                test_drs = example_models[str(mlp_layer) + '_' + str(mlp_dim) + '_' + str(embed_dim)]
                test_drs = test_drs.eval().to(device)

                for input_sparsity_i, input_sparsity in enumerate(input_sparsities):
                    _, example_loader = example_data[input_sparsity]
                    preds = []
                    for item in example_loader:
                        injector = Injector("./targets", ber, device = device, verbose = False)
                        injector.inject(test_drs)
                        del injector
                        pred = test_drs(*item).detach() 
                        preds.extend(pred.tolist())
                    preds = torch.tensor(preds)
                    results_error_injected_mlp[ber_i][mlp_layer_i][mlp_dim_i][embed_dim_i][input_sparsity_i] = preds.squeeze()
                    
torch.save(results_error_injected_mlp, './results_error_injected_mlp.pth')

1e-08
5e-08
1e-07
5e-07
1e-06


In [17]:
embed_dims = [64, 128, 256, 512]
mlp_dims = [64, 128, 256, 512]
mlp_layers = [1, 2]
input_sparsities = [.001, .01, .1]
dense_dim = 128
sparse_dim = 8192
num_sample = 10000

bers = [1e-8, 5e-8, 1e-7, 5e-7, 1e-6]
results_error_injected_embed = torch.zeros(size = (
    len(bers),
    len(mlp_layers),
    len(mlp_dims),
    len(embed_dims),
    len(input_sparsities),
    num_sample
))

dense_output_dim = 32

example_models = torch.load('./example_models.pth')

for ber_i, ber in enumerate(bers):
    print(ber)
    for mlp_layer_i, mlp_layer in enumerate(mlp_layers):
        for mlp_dim_i, mlp_dim in enumerate(mlp_dims):
            for embed_dim_i, embed_dim in enumerate(embed_dims):
                pred_input_dim = dense_output_dim + embed_dim
                test_drs = example_models[str(mlp_layer) + '_' + str(mlp_dim) + '_' + str(embed_dim)]
                test_drs = test_drs.eval().to(device)

                for input_sparsity_i, input_sparsity in enumerate(input_sparsities):
                    _, example_loader = example_data[input_sparsity]
                    preds = []
                    for item in example_loader:
                        injector = Injector("./targets", ber, device = device, verbose = False)
                        injector.inject(test_drs)
                        del injector
                        pred = test_drs(*item).detach() 
                        preds.extend(pred.tolist())
                    preds = torch.tensor(preds)
                    results_error_injected_embed[ber_i][mlp_layer_i][mlp_dim_i][embed_dim_i][input_sparsity_i] = preds.squeeze()

torch.save(results_error_injected_embed, './results_error_injected_embed.pth')

1e-08
5e-08
1e-07
5e-07
1e-06
