In [2]:
from torch import nn

# this prints out the named parameters of a model
def print_named_params(model: nn.Module) -> None:
    for name, param in model.named_parameters():
        print(f"{name}: {param.shape}")

def output_targets(model: nn.Module, file: str) -> None:
    with open(file, 'w') as f:
        for name, param in model.named_parameters():
            f.write(f"{name}\n")

In [3]:
import torch
from copy import deepcopy
#import timm
torch.set_printoptions(precision = 6, sci_mode = False)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

cpu


In [4]:
import sys
sys.path.append('../../pytei')

In [5]:
def get_num_parameters(model: nn.Module) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [6]:
from pytei import Injector
def inject_error(model: nn.Module, error_map_file: str, prob) -> nn.Module:
    model_error = deepcopy(model).to(device)
    injector = Injector(error_map_file, p = prob, device = device, verbose = True)
    injector.inject(model_error)
    return model_error

## GPT 2

In [9]:
pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (7.7 kB)
Downloading sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.2.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install transformers


In [7]:
from transformers import GPT2Tokenizer, GPT2Model
from transformers import T5Tokenizer, T5Model
from collections import OrderedDict

tokenizer = T5Tokenizer.from_pretrained('t5-base')
gpt2 = T5Model.from_pretrained('t5-base')

def get_modified_state_dictGPT2(model: nn.Module):
    new_state_dict = deepcopy(model.state_dict())
    for key in list(new_state_dict.keys()):
        new_state_dict[f"transformer.{key}"] = new_state_dict.pop(key)
    return new_state_dict

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [7]:
print_named_params(gpt2)
output_targets(gpt2, "gpt2_targets")

shared.weight: torch.Size([32128, 768])
encoder.block.0.layer.0.SelfAttention.q.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.k.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.v.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.o.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight: torch.Size([32, 12])
encoder.block.0.layer.0.layer_norm.weight: torch.Size([768])
encoder.block.0.layer.1.DenseReluDense.wi.weight: torch.Size([3072, 768])
encoder.block.0.layer.1.DenseReluDense.wo.weight: torch.Size([768, 3072])
encoder.block.0.layer.1.layer_norm.weight: torch.Size([768])
encoder.block.1.layer.0.SelfAttention.q.weight: torch.Size([768, 768])
encoder.block.1.layer.0.SelfAttention.k.weight: torch.Size([768, 768])
encoder.block.1.layer.0.SelfAttention.v.weight: torch.Size([768, 768])
encoder.block.1.layer.0.SelfAttention.o.weight: torch.Size([768, 768])
encoder.block.1.layer.0.layer_n

In [11]:
gpt2_error = inject_error(gpt2, "gpt2_targets")
gpt2_error.eval()
text = "blahblahblah"
test_input = tokenizer(text, return_tensors='pt').to(device)


with torch.no_grad():
    error_out = gpt2_error(**test_input) # gpt

TypeError: inject_error() missing 1 required positional argument: 'prob'

## MAMBA

## Evaluate

In [14]:
pip install deepeval

Collecting deepeval
  Downloading deepeval-1.5.9-py3-none-any.whl.metadata (977 bytes)
Collecting typer (from deepeval)
  Downloading typer-0.13.1-py3-none-any.whl.metadata (15 kB)
Collecting sentry-sdk (from deepeval)
  Downloading sentry_sdk-2.19.0-py2.py3-none-any.whl.metadata (9.9 kB)
Collecting pytest-repeat (from deepeval)
  Using cached pytest_repeat-0.9.3-py3-none-any.whl.metadata (4.9 kB)
Collecting pytest-xdist (from deepeval)
  Using cached pytest_xdist-3.6.1-py3-none-any.whl.metadata (4.3 kB)
Collecting portalocker (from deepeval)
  Downloading portalocker-3.0.0-py3-none-any.whl.metadata (8.5 kB)
Collecting langchain (from deepeval)
  Using cached langchain-0.3.7-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core (from deepeval)
  Downloading langchain_core-0.3.20-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-openai (from deepeval)
  Downloading langchain_openai-0.2.9-py3-none-any.whl.metadata (2.6 kB)
Collecting ragas (from deepeval)
  Downloading ragas

In [2]:
pip install -q -U transformers accelerate bitsandbytes

Note: you may need to restart the kernel to use updated packages.


In [13]:
pip install -U bitsandbytes

Note: you may need to restart the kernel to use updated packages.


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from deepeval.models.base_model import DeepEvalBaseLLM
from typing import List
from bitsandbytes import BitsAndBytesConfig

class Mistral7B(DeepEvalBaseLLM):
    def __init__(
        self,
        model,
        tokenizer
    ):
        self.model = model
        self.tokenizer = tokenizer

    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:
        model = self.load_model()

        model_inputs = self.tokenizer([prompt], return_tensors="pt").to(device)
        model.to(device)

        generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
        return self.tokenizer.batch_decode(generated_ids)[0]

    async def a_generate(self, prompt: str) -> str:
        return self.generate(prompt)

    # This is optional.
    def batch_generate(self, promtps: List[str]) -> List[str]:
        model = self.load_model()
        device = "cuda" # the device to load the model onto

        model_inputs = self.tokenizer(promtps, return_tensors="pt").to(device)
        model.to(device)

        generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
        return self.tokenizer.batch_decode(generated_ids)

    def get_model_name(self):
        return "Mistral 7B"

from transformers import AutoTokenizer, AutoModelForCausalLM 
import torch

from transformers import BitsAndBytesConfig

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
token = "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"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
        token=token
    )
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)


#model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", token="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")
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

#mistral_7b = Mistral7B(model=model, tokenizer=tokenizer)
#print(mistral_7b("Write me a joke"))

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


ImportError: cannot import name 'BitsAndBytesConfig' from 'bitsandbytes' (/opt/anaconda3/lib/python3.12/site-packages/bitsandbytes/__init__.py)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", token="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")
mistral_7b = Mistral7B(model=model, tokenizer=tokenizer)
evaluate_model_MMLU(mistral_7b)

In [1]:
from deepeval.benchmarks import MMLU
from deepeval.benchmarks.tasks import MMLUTask

def evaluate_model_MMLU(model):

    benchmark = MMLU(
        tasks=[MMLUTask.HIGH_SCHOOL_COMPUTER_SCIENCE, MMLUTask.ASTRONOMY],
        n_shots=3
    )

    benchmark.evaluate(model=model)
    return benchmark.task_scores

from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("t5-base")
tokenizer = AutoTokenizer.from_pretrained("t5-base")

hf_model = T5(model, tokenizer)
evaluate_model_MMLU(hf_model)

ValueError: Unrecognized configuration class <class 'transformers.models.t5.configuration_t5.T5Config'> for this kind of AutoModel: AutoModelForCausalLM.
Model type should be one of BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, CpmAntConfig, CTRLConfig, Data2VecTextConfig, DbrxConfig, ElectraConfig, ErnieConfig, FalconConfig, FalconMambaConfig, FuyuConfig, GemmaConfig, Gemma2Config, GitConfig, GlmConfig, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, JambaConfig, JetMoeConfig, LlamaConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MistralConfig, MixtralConfig, MllamaConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NemotronConfig, OlmoConfig, OlmoeConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, PhimoeConfig, PLBartConfig, ProphetNetConfig, QDQBertConfig, Qwen2Config, Qwen2MoeConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, Speech2Text2Config, StableLmConfig, Starcoder2Config, TransfoXLConfig, TrOCRConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMProphetNetConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, XmodConfig, ZambaConfig.

In [11]:
config = AutoConfig.from_pretrained("t5-base") # Change to whichever model architecture being used
hf_model = AutoModelForSeq2SeqLM.from_config(config) 

shared.weight: torch.Size([32128, 768])
encoder.block.0.layer.0.SelfAttention.q.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.k.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.v.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.o.weight: torch.Size([768, 768])
encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight: torch.Size([32, 12])
encoder.block.0.layer.0.layer_norm.weight: torch.Size([768])
encoder.block.0.layer.1.DenseReluDense.wi.weight: torch.Size([3072, 768])
encoder.block.0.layer.1.DenseReluDense.wo.weight: torch.Size([768, 3072])
encoder.block.0.layer.1.layer_norm.weight: torch.Size([768])
encoder.block.1.layer.0.SelfAttention.q.weight: torch.Size([768, 768])
encoder.block.1.layer.0.SelfAttention.k.weight: torch.Size([768, 768])
encoder.block.1.layer.0.SelfAttention.v.weight: torch.Size([768, 768])
encoder.block.1.layer.0.SelfAttention.o.weight: torch.Size([768, 768])
encoder.block.1.layer.0.layer_n

In [30]:
probability = [2e-9, 4e-9, 6e-9, 8e-9]
for prob in probability:
    while True:
        #test_model = inject_error(gpt2, "gpt2_targets", prob)
        test_model = hf_model
        result = evaluate_model_MMLU_GPT2(test_model)
        for i in result.index:
            task = result.loc[i, "Task"]
            score = result.loc[i, "Score"]
            with open(f"results/gpt2_{str(prob)}_{task}", "w") as f:
                f.write(f"{str(score)}\n") 
        print("Succeeded no nan")
        break

Processing high_school_computer_science:   0%|                                                                                                                                                                               | 0/100 [00:00<?, ?it/s]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


The following are multiple choice questions (with answers) about high school computer science.

Which of the following is an example of the use of a device on the Internet of Things (IoT) ?
A. A car alerts a driver that it is about to hit an object.
B. A hiker uses a G P S watch to keep track of her position.
C. A refrigerator orders milk from an online delivery service when the milk in the refrigerator is almost gone.
D. A runner uses a watch with optical sensors to monitor his heart rate.
Answer: C

Many Web browsers allow users to open anonymous windows. During a browsing session in an anonymous window, the browser does not record a browsing history or a list of downloaded files. When the anonymous window is exited, cookies created during the session are deleted. Which of the following statements about browsing sessions in an anonymous window is true?
A. The activities of a user browsing in an anonymous window will not be visible to people who monitor the user's network, such as the

Processing high_school_computer_science:   0%|                                                                                                                                                                               | 0/100 [00:05<?, ?it/s]

[{'generated_text': 'jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeu jeuPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCRPCR graffiti graffiti graffiti graffiti graffiti graffiti graffiti graffiti produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits produits'}]





JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [68]:
test_model = gpt2
result = evaluate_model_MMLU_GPT2(test_model)
for i in result.index:
    task = result.loc[i, "Task"]
    score = result.loc[i, "Score"]
    with open(f"results/gpt2_{str(0)}_{task}", "w") as f:
        f.write(f"{str(score)}\n")        

Processing high_school_computer_science:   0%|                                                                                                                                                                               | 0/100 [00:00<?, ?it/s]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing high_school_computer_science:   1%|█▋                                                                                                                                                                     | 1/100 [00:02<03:49,  2.32s/it]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing high_school_computer_science:   2%|███▎                                                                                                                                                                   | 2/1

MMLU Task Accuracy (task=high_school_computer_science): 0.23


Processing astronomy:   0%|                                                                                                                                                                                                  | 0/152 [00:00<?, ?it/s]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing astronomy:   1%|█▏                                                                                                                                                                                        | 1/152 [00:02<05:36,  2.23s/it]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing astronomy:   1%|██▍                                                                                                                                                                                       | 2/1

MMLU Task Accuracy (task=astronomy): 0.20394736842105263
Overall MMLU Accuracy: 0.21428571428571427





In [35]:
result = evaluate_model_MMLU_GPT2(gpt2_error)

Processing high_school_computer_science:   0%|                                                                                                                                                                               | 0/100 [00:00<?, ?it/s]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing high_school_computer_science:   1%|█▋                                                                                                                                                                     | 1/100 [00:02<04:29,  2.72s/it]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing high_school_computer_science:   2%|███▎                                                                                                                                                                   | 2/1

MMLU Task Accuracy (task=high_school_computer_science): 0.19


Processing astronomy:   0%|                                                                                                                                                                                                  | 0/152 [00:00<?, ?it/s]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing astronomy:   1%|█▏                                                                                                                                                                                        | 1/152 [00:02<06:26,  2.56s/it]Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Processing astronomy:   1%|██▍                                                                                                                                                                                       | 2/1

MMLU Task Accuracy (task=astronomy): 0.15789473684210525
Overall MMLU Accuracy: 0.17063492063492064





In [40]:
result

Unnamed: 0,Task,Score
0,high_school_computer_science,0.19
1,astronomy,0.157895


In [41]:
result.loc[result["Task"] == "high_school_computer_science"]

NameError: name 'high_school_computer_science' is not defined

In [49]:
len(result)

2

In [63]:
for i in result.index:
    task = result.loc[i, "Task"]
    score = result.loc[i, "Score"]
    with open(f"results/gpt2_{str(prob)}_{task}", "w") as f:
        f.write(f"{str(score)}\n") 
    print("Succeeded no nan")

Succeeded no nan
Succeeded no nan


In [69]:
import matplotlib as plt

ModuleNotFoundError: No module named 'matplotlib'