## Utility functions

In [1]:
from torch import nn

# this prints out the named parameters of a model
def print_named_params(model: nn.Module) -> None:
    for name, param in model.named_parameters():
        print(f"{name}: {param.shape}")

In [5]:
import torch
from copy import deepcopy
# import timm
torch.set_printoptions(precision = 6, sci_mode = False)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [6]:
import sys
sys.path.append('../../pytei')

In [7]:
def get_num_parameters(model: nn.Module) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

## GPT 2

In [8]:
from transformers import GPT2Tokenizer, GPT2Model

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
test_model = GPT2Model.from_pretrained('gpt2')

In [9]:
with open("targets", "w") as f:
    for name, param in model.named_parameters():
        print(f"{name}: {param.shape}")
        f.write(f";{name}\n")

NameError: name 'model' is not defined

## Mamba

In [10]:
tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]

out = model.generate(input_ids, max_new_tokens=10)
print(tokenizer.batch_decode(out))

NameError: name 'AutoTokenizer' is not defined

In [44]:
from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer, MambaModel
import torch

In [50]:
tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
# test_model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
test_model = MambaModel(MambaConfig()) # not sure whether this is the pretrained version or not

In [51]:
print_named_params(test_model)

embeddings.weight: torch.Size([50280, 768])
layers.0.norm.weight: torch.Size([768])
layers.0.mixer.A_log: torch.Size([1536, 16])
layers.0.mixer.D: torch.Size([1536])
layers.0.mixer.conv1d.weight: torch.Size([1536, 1, 4])
layers.0.mixer.conv1d.bias: torch.Size([1536])
layers.0.mixer.in_proj.weight: torch.Size([3072, 768])
layers.0.mixer.x_proj.weight: torch.Size([80, 1536])
layers.0.mixer.dt_proj.weight: torch.Size([1536, 48])
layers.0.mixer.dt_proj.bias: torch.Size([1536])
layers.0.mixer.out_proj.weight: torch.Size([768, 1536])
layers.1.norm.weight: torch.Size([768])
layers.1.mixer.A_log: torch.Size([1536, 16])
layers.1.mixer.D: torch.Size([1536])
layers.1.mixer.conv1d.weight: torch.Size([1536, 1, 4])
layers.1.mixer.conv1d.bias: torch.Size([1536])
layers.1.mixer.in_proj.weight: torch.Size([3072, 768])
layers.1.mixer.x_proj.weight: torch.Size([80, 1536])
layers.1.mixer.dt_proj.weight: torch.Size([1536, 48])
layers.1.mixer.dt_proj.bias: torch.Size([1536])
layers.1.mixer.out_proj.weight: 

In [52]:
# import re
# pattern = r".*weight$"

# with open("targets_mamba", "w") as f:
#     for param_name, _ in test_model.named_parameters():
#         if re.match(pattern, param_name):
#             f.write(f"{param_name}\n")

## Test out Pytei on model

In [11]:
from pytei import Injector

model = deepcopy(test_model).to(device)
model.eval()
text = "blahblahblah"
test_input = tokenizer(text, return_tensors='pt').to(device)

with torch.no_grad():
    # error_map_file = "./targets"
    error_map_file = "./targets_mamba"
    injector = Injector(error_map_file, p = 1e-7, device = device, verbose = True, mitigation = 'clip')
    print('----------Error free----------')
    error_free_out = model(**test_input) # gpt
    # error_free_out = model.generate(test_input["input_ids"]) # mamba
    print('Outputs (error-free):', error_free_out)
    
    print('----------Error Injected----------')
    injector.inject(model)
    error_out = model(**test_input) # gpt
    # error_out = model.generate(test_input["input_ids"]) # mamba
    print('Outputs (error-injected):', error_out)

    # print('----------Error Mitigated----------')
    # model = deepcopy(model).to(device)
    # model.eval()
    # injector.inject(model, use_mitigation = True)
    # error_mitig_out = model(**test_input) # gpt
    # error_mitig_out = model.generate(test_input["input_ids"]) # mamba
    # print('Outputs (error-mitigated):', error_mitig_out)

    # injector.save_error_map('../../temp/testmap.pt', sparse = True)
    # injector.load_error_map('../../temp/testmap.pt', sparse = True)

Injector initialized.
Error probability: 1e-07
Data type: torch.float32
Error model: bit
----------Error free----------
Outputs (error-free): BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[-0.103412, -0.091130, -0.307225,  ..., -0.164230,  0.012637,
          -0.080722],
         [-0.847831, -0.317081, -1.190969,  ...,  0.154238,  0.025244,
           0.295463],
         [-0.884541, -0.686861, -0.170217,  ...,  0.042957, -0.269914,
           0.235113],
         [-0.260378,  0.154410, -0.386218,  ...,  0.239388, -0.052664,
          -0.076842],
         [-0.428518, -0.215767, -0.580418,  ...,  0.245511, -0.083306,
           0.392479],
         [-0.158654,  0.402145, -0.372327,  ...,  0.442274, -0.229969,
          -0.113622]]]), past_key_values=((tensor([[[[    -1.206789,      2.134209,      0.389755,  ...,
               -1.278964,     -0.253597,      1.787401],
          [    -2.200362,      2.386499,      2.112720,  ...,
               -1.103022,     -2.01573

In [12]:
print('----------Difference (RMSE)----------')
rmse = torch.sqrt(torch.mean((error_out.last_hidden_state - error_free_out.last_hidden_state) ** 2)).item() # gpt
print(f"inject & error-free: ", rmse)
rmse = torch.sqrt(torch.mean((error_mitig_out.last_hidden_state - error_free_out.last_hidden_state) ** 2)).item() # gpt
print(f"mitigated & error-free: ", rmse)

----------Difference (RMSE)----------
inject & error-free:  0.0


NameError: name 'error_mitig_out' is not defined

In [None]:
tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
test_model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")

from pytei import Injector

model = deepcopy(test_model).to(device)
model.eval()
text = "blahblahblah"
test_input = tokenizer(text, return_tensors='pt').to(device)

print("model: ", mode.__name__)

with torch.no_grad():
    injector = Injector('./targets', p = 1e-7, device = device, verbose = True, mitigation = 'clip')
    print('----------Error free----------')
    error_free_out = model(**test_input)
    print('Outputs (error-free):', error_free_out)
    
    print('----------Error Injected----------')
    injector.inject(model)
    error_out = model(**test_input)
    print('Outputs (error-injected):', error_out)

    print('----------Error Mitigated----------')
    model = deepcopy(model).to(device)
    model.eval()
    injector.inject(model, use_mitigation = True)
    error_mitig_out = model(**test_input)
    print('Outputs (error-mitigated):', error_mitig_out)