In [None]:
!pip install -q -U transformers
!pip install -q -U accelerate
!pip install -q -U bitsandbytes

In [None]:
!pip install -q -U deepeval

In [None]:
!pip install -q -U pydantic
!pip install -q -U lm-format-enforcer

Create a dataset with the pytei files (__init__.py, pytei.py, dpytei.py) and upload the dataset named pytei-files

In [None]:
import sys
sys.path.append( "/kaggle/input/pytei-files" )
from pytei import Injector

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

Put your hugging face token below

In [None]:
token = ""

In [None]:
device = 0 if torch.cuda.is_available() else -1
print(device)

In [None]:
from deepeval.models.base_model import DeepEvalBaseLLM
from typing import List
from pydantic import BaseModel
from lmformatenforcer import JsonSchemaParser
from lmformatenforcer.integrations.transformers import (
    build_transformers_prefix_allowed_tokens_fn,
)
import transformers
from transformers import pipeline
import json

class GPT2(DeepEvalBaseLLM):
    def __init__(
        self,
        model,
        tokenizer
    ):
        self.model = model
        self.tokenizer = tokenizer

    def load_model(self):
        return self.model

    def generate(self, prompt: str, schema: BaseModel) -> BaseModel:
        model = self.load_model()
        pipeline = transformers.pipeline(
            "text-generation",
            model=model,
            tokenizer=self.tokenizer,
            use_cache=True,
            device_map="auto",
            max_new_tokens=100,
            do_sample=True,
            top_k=5,
            num_return_sequences=1,
            eos_token_id=self.tokenizer.eos_token_id,
            pad_token_id=self.tokenizer.eos_token_id,
        )

        # Create parser required for JSON confinement using lmformatenforcer
        parser = JsonSchemaParser(schema.schema())
        prefix_function = build_transformers_prefix_allowed_tokens_fn(
            pipeline.tokenizer, parser
        )

        # Output and load valid JSON
        output_dict = pipeline(prompt, prefix_allowed_tokens_fn=prefix_function)
        output = output_dict[0]["generated_text"][len(prompt) :]
        json_result = json.loads(output)

        # Return valid JSON object according to the schema DeepEval supplied
        return schema(**json_result)

    async def a_generate(self, prompt: str, schema) -> BaseModel:
        return self.generate(prompt, schema)

    # This is optional.
    def batch_generate(self, promtps: List[str]) -> List[str]:
        model = self.load_model()
        device = "cuda" # the device to load the model onto

        model_inputs = self.tokenizer(promtps, return_tensors="pt").to(device)
        model.to(device)

        generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
        return self.tokenizer.batch_decode(generated_ids)

    def get_model_name(self):
        return "GPT2"

In [None]:
import re
from torch import nn
def print_named_params(model: nn.Module) -> None:
    for name, param in model.named_parameters():
        print(f"{name}: {param.shape}")
def output_targets(model: nn.Module, file: str, regex: str) -> None:
    with open(f'{file}', 'w') as f:
        for name, param in model.named_parameters():
            if (re.match(regex, name)):
                f.write(f"{name}\n")

You will get an error 1-2 times when running this, whenever this happens, go to "Run" at the top and restart the kernel (NOT the session)

There is some load up time for the first iteration to download the model, but everything after should be fast

The code will throw an error everytime layer switches so you need to be at the computer to switch layers. Each computation for a fixed layer and probability should take about 15 minutes. Whenever you restart the session you need to save your computer or the files to your computer. It would be best if you could save the output files to your local computer and just send them. They are named "mistral_output_{x}_{???}".

In [None]:
from deepeval.benchmarks import MMLU
from deepeval.benchmarks.tasks import MMLUTask
import time
import gc    

probabilities = [9, 8, 7, 6, 5, 4, 3, 2, 1]
layers = [[str(i+j) for j in range(0, 4)] for i in range(0, 32, 4)]
layers = layers[1:]
for layer in layers:
    model_name = 'mistralai/Mistral-7B-Instruct-v0.3'
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=token, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
            model_name,
            #quantization_config=bnb_config,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            trust_remote_code=True,
            token=token,
        )
    
    for prob in probabilities:
        reg = '|'.join([f"model\.layers\.{i}\..*" for i in layer])
        target_file = f"mistral_target_{prob}_{'_'.join(layer)}"
        output_file = f"mistral_output_{prob}_{'_'.join(layer)}"
        output_targets(model, target_file, reg)


    
        benchmark = MMLU(
            tasks=[MMLUTask.HIGH_SCHOOL_COMPUTER_SCIENCE, MMLUTask.ASTRONOMY],
            n_shots=1
        )
        injector = Injector(target_file, p = pow(10, -prob), device = device, dtype = torch.float16, verbose = True)
        injector.inject_values(model)
        mistral = GPT2(model, tokenizer)
        benchmark.evaluate(model=mistral)
        
        with open(output_file, 'a') as f:
            f.write(f'{benchmark.overall_score}\n')