# Selective lesioning
Now, our aim is to erode a model in a **controlled manner**. Michael Levin's "multiple levels of competency" (planaria; evaluate biology by how well it reacts to errors) is a main inspiration. How can higher level layers of "agents" resolve errors from lower levels? Why are these systems (biological agents + llm's) so robust? How robust is our model to perturbances? 

Model performance is tracked by benchmarking over a question set (generated w/ an assist from gpt-4 :)). 

To do: 
+ Load base model + replace mps code x
+ Pull in question set x
+ Write some code to test/track tokens per second
+ Establish basic eval framework - need to (1) feed questions (async?); (2) randomly shut off weights from non-embed layers; (3) track perf. changes as culling increases
+ (extra) think abt ways to selectively kill off weights :) 

Notes: 
+ Keep an eye on mem. use - disk space can be monitored via `du -hs $HOME /workspace/*` - we have 100GB avail. 

In [4]:
# Load libraries
# import flash_attn
from dotenv import main
import torch
import json
import jinja2
import os
import sys
import re
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig # for quantization
from torch.nn import Softmax
import plotly
from transformers import pipeline, set_seed

# auth for gated repos (like llama) - gen token here: https://huggingface.co/settings/tokens
from huggingface_hub import notebook_login
notebook_login(os.getenv('HF_TOKEN'))

# model ids
model_id = ["microsoft/Phi-3-mini-4k-instruct"]

# Set seed for reproducibility 
torch.random.manual_seed(0)

# Increase max width of pd df columns 
pd.set_option('max_colwidth', 300)

# Instantiate jinja environment - used later for icl prompting 
environment = jinja2.Environment()

# requirements.txt
# !pip3 freeze > requirements.txt

User is already logged in.


In [74]:
# Define utility functions 
# mem. monitoring! 
def check_memory():
    print("Allocated: %fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
    print("Reserved: %fGB"%(torch.cuda.memory_reserved(0)/1024/1024/1024))
    print("Total: %fGB"%(torch.cuda.get_device_properties(0).total_memory/1024/1024/1024))

# notification/text-to-speech
def text_to_speech(text):
    if sys.platform == 'darwin':
        os.system(f'say "{text}"')
    elif sys.platform.startswith('linux'):
        os.system(f'espeak "{text}"')
    else:
        print("Text-to-speech is not supported on this platform.")

# parse + template phi inputs
def parse_phi(messages: list[dict], append_response_start = True) -> str:
    """
    Converts a multi-turn conversation into a Llama-3-tokenizable input.

    Output format:
    # <s><|system|>
    # You are a helpful AI assistant.<|end|>
    # <|user|>
    # Guess my dog's name!<|end|>
    # <|assistant|>
    """
    format = '<s>'
    
    format += '\n'.join([f"<|{m['role']}|>\n{m['content']}<|end|>" for m in messages])

    if append_response_start:
        format += "\n<|assistant|>"
    
    return format

# print(parse_phi([
#     {'role': 'system', 'content': 'Hello'}, {'role': 'user', 'content': '1+1?'}, {'role': 'assistant', 'content': '2'}
# ], False))

# model eval
def eval_model(model, tokenizer, prompt):
    tokens = tokenizer(prompt, return_tensors = 'pt').to(device)
    model.eval()
    with torch.no_grad():
        res = model.generate(
            **tokens,
            max_new_tokens = 128,
            do_sample = True,
            temperature = 0.6,
            top_p = 0.9,
            eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(tokenizer.eos_token)]
        )
    return tokenizer.batch_decode(res)[0]

In [55]:
# Load bnb config, base model, and tokenizer
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_id[0],
    # device_map = 'auto', # not sure what's up with device_map, but this is what causes errors
    quantization_config = bnb_config,
    trust_remote_code = True
)

# Load tokenizer - remove bos token since my function already pre-pends
tokenizer = AutoTokenizer.from_pretrained(model_id[0],
                                         add_eos_token = False,
                                         add_bos_token = False,
                                         padding_side = 'left')

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# Initial eval. setup
Here, we template our questions and run an initial evaluation of phi-3's performance before modification.

In [6]:
# set base prompt 
base_prompt = [
    {
        "role": "system",
        "content": "You are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`."
    },
    {
        "role": "user",
        "content": "What's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC. dog\nD. 2"
    },
    {
        "role": "assistant",
        "content": '{"rationale": "5/3 is between 1 (3/3) and 2 (6/3), so the integer ceiling is 2.", "answer": "D"}'
    }, 
    {
        "role": "user",
        "content": "What's the capital of the U.S. state of Georgia?\nA. Tblisi\nB. Atlanta\nC. Nashville\nD. Toronto"
    },
    {
        "role": "assistant",
        "content": '{"rationale": "The capital of the U.S. state of Georgia is Atlanta, located in the Northwest of the state.", "answer": "B"}'
    }
]

In [72]:
# create eval/questions df 
# GPT-4 generation prompt
# I am benchmarking an LLM. I want you to create 100 MMLU-style questions. Return them in a JSON array of the format specified below. The questions should be a mix of easy/medium/hard difficulty. 
# The types should be "math", "extraction", "reasoning", "facts". 
# - "Math" questions should be related to arithmetic, calculus, or statistics. 
# - "Extraction" questions should focus on NLP-style NER tasks.
# - "Reasoning" should focus on logic. 
# - "Facts" should be focused on facts related to science or nature.
# Here is an example of a question (do not use this question).
# ```
# [
# {"question": "Suppose you have a data source that generates binary messages. Each message can either be 0 or 1. If both outcomes are equally likely, what is the entropy of this data source?", "options": [{"code": "A", "text": "0 bits"}, {"code": "B", "text": "0.5 bits"}, {"code": "C", "text": "1 bit"}, {"code": "D", "text": "2 bits"}], "solution": "C", "difficulty": "hard", "type": "math"},
# {"question": "What element is represented by the symbol 'Na' on the periodic table?", "options": [{"code": "A", "text": "Nitrogen"}, {"code": "B", "text": "Nickel"}, {"code": "C", "text": "Neon"}, {"code": "D", "text": "Sodium"}], "solution": "D", "difficulty": "easy", "type": "facts"},
# ]
# ```

# Load questions.json
q_file_path = os.getcwd() + '/data/question.json'
q_file = open(q_file_path)
q_list = json.load(q_file) # yields list of dicts 


# create list of dicts, with addtl. keys allocated for full question + llm input
eval_df = pd.DataFrame(q_list).assign(
     full_question = lambda df: df.apply(lambda row: row['question'] + '\n' + '\n'.join([o['code'] + '. ' + o['text'] for o in row['options']]),  axis = 1),
     llm_input = lambda df: df.apply(lambda row: parse_phi(base_prompt + [{'role': 'assistant', 'content': row['full_question']}]), axis = 1)

)

# print(len(eval_df)) 
# print(eval_df['llm_input'][0]) # check on single input to ensure correct structure :) 
# eval_df.groupby('difficulty').count() # overall eval metrics

In [75]:
eval_df

Unnamed: 0,question,options,solution,difficulty,type,full_question,llm_input
0,"Suppose you have a data source that generates binary messages. Each message can either be 0 or 1. If both outcomes are equally likely, what is the entropy of this data source?","[{'code': 'A', 'text': '0 bits'}, {'code': 'B', 'text': '0.5 bits'}, {'code': 'C', 'text': '1 bit'}, {'code': 'D', 'text': '2 bits'}]",C,hard,math,"Suppose you have a data source that generates binary messages. Each message can either be 0 or 1. If both outcomes are equally likely, what is the entropy of this data source?\nA. 0 bits\nB. 0.5 bits\nC. 1 bit\nD. 2 bits","<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
1,What element is represented by the symbol 'Na' on the periodic table?,"[{'code': 'A', 'text': 'Nitrogen'}, {'code': 'B', 'text': 'Nickel'}, {'code': 'C', 'text': 'Neon'}, {'code': 'D', 'text': 'Sodium'}]",D,easy,facts,What element is represented by the symbol 'Na' on the periodic table?\nA. Nitrogen\nB. Nickel\nC. Neon\nD. Sodium,"<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
2,A rectangle has a length of 10 meters and a width of 5 meters. What is its area?,"[{'code': 'A', 'text': '15 square meters'}, {'code': 'B', 'text': '50 square meters'}, {'code': 'C', 'text': '25 square meters'}, {'code': 'D', 'text': '100 square meters'}]",B,easy,math,A rectangle has a length of 10 meters and a width of 5 meters. What is its area?\nA. 15 square meters\nB. 50 square meters\nC. 25 square meters\nD. 100 square meters,"<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
3,"From the text: 'The cat, which was grey, jumped over the sleepy dog.' Identify the color of the cat.","[{'code': 'A', 'text': 'Brown'}, {'code': 'B', 'text': 'Black'}, {'code': 'C', 'text': 'Grey'}, {'code': 'D', 'text': 'White'}]",C,easy,extraction,"From the text: 'The cat, which was grey, jumped over the sleepy dog.' Identify the color of the cat.\nA. Brown\nB. Black\nC. Grey\nD. White","<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
4,"If it is true that all roses are flowers and some flowers fade quickly, which statement must be true?","[{'code': 'A', 'text': 'All roses fade quickly'}, {'code': 'B', 'text': 'No roses fade'}, {'code': 'C', 'text': 'Some roses may fade quickly'}, {'code': 'D', 'text': 'Roses are not flowers'}]",C,medium,reasoning,"If it is true that all roses are flowers and some flowers fade quickly, which statement must be true?\nA. All roses fade quickly\nB. No roses fade\nC. Some roses may fade quickly\nD. Roses are not flowers","<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
...,...,...,...,...,...,...,...
245,What is the primary gas in Earth's atmosphere?,"[{'code': 'A', 'text': 'Oxygen'}, {'code': 'B', 'text': 'Carbon Dioxide'}, {'code': 'C', 'text': 'Nitrogen'}, {'code': 'D', 'text': 'Hydrogen'}]",C,medium,facts,What is the primary gas in Earth's atmosphere?\nA. Oxygen\nB. Carbon Dioxide\nC. Nitrogen\nD. Hydrogen,"<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
246,Extract the main topic discussed in 'Modern Architectural Trends': 'Focus areas include sustainability and use of renewable resources.',"[{'code': 'A', 'text': 'Sustainability, Renewable Resources'}, {'code': 'B', 'text': 'Modern, Architectural'}, {'code': 'C', 'text': 'Trends, Focus'}, {'code': 'D', 'text': 'Areas, Use'}]",A,medium,extraction,"Extract the main topic discussed in 'Modern Architectural Trends': 'Focus areas include sustainability and use of renewable resources.'\nA. Sustainability, Renewable Resources\nB. Modern, Architectural\nC. Trends, Focus\nD. Areas, Use","<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
247,"If a hypothesis states that 'Adding fertilizer increases plant growth' and plants with fertilizer show no growth difference, what is the implication?","[{'code': 'A', 'text': 'The hypothesis is confirmed'}, {'code': 'B', 'text': 'The hypothesis is refuted'}, {'code': 'C', 'text': 'More data is needed'}, {'code': 'D', 'text': 'The plants are not suitable'}]",B,medium,reasoning,"If a hypothesis states that 'Adding fertilizer increases plant growth' and plants with fertilizer show no growth difference, what is the implication?\nA. The hypothesis is confirmed\nB. The hypothesis is refuted\nC. More data is needed\nD. The plants are not suitable","<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."
248,Which element is essential for the construction of nuclear reactors due to its ability to absorb neutrons?,"[{'code': 'A', 'text': 'Uranium'}, {'code': 'B', 'text': 'Plutonium'}, {'code': 'C', 'text': 'Boron'}, {'code': 'D', 'text': 'Lead'}]",C,medium,facts,Which element is essential for the construction of nuclear reactors due to its ability to absorb neutrons?\nA. Uranium\nB. Plutonium\nC. Boron\nD. Lead,"<s><|system|>\nYou are a helpful, honest, and intelligent AI assistant who can only respond with a single JSON object. Solve each of the following questions. Return a JSON object containing two keys, `rationale` and `answer`.<|end|>\n<|user|>\nWhat's the integer ceiling of 5/3?\nA. 3\nB. 4.25\nC..."


In [None]:
# eval. base phi-3
# gen + store results 
res = []
for idx, question in enumerate(mcq['question'][0:2]): 
    print(f"Now processing question {idx}")
    icl_prompt = f"{icl_prompt_head}\n{question}\n"
    icl_prompt += mcq['choices'][idx] + '\n' # mod. prompt to add in choices 

    # pull out correct answer
    correct_answer = mcq['correct_answer'][idx]
    # print(icl_prompt, correct_answer)

    keep_going = True
    while keep_going == True: 
        # generate response 
        response = eval_model(model = base_model, tokenizer = tokenizer, prompt = icl_prompt)
    
        # error handling for malformed outputs 
        pattern = r'({.*?' + re.escape(question) + r'.*?})'
        response_json = re.findall(pattern, result['response']) # (try to) extract jsons that meet criteria]

        # initialize keep_going + check if response_json is empty list 
        if len(response_json) != 0: 
            print('Response json empty/malformed - re-submitting') 
            keep_going = False
        
    print('Valid json found - continue') 
    
    # store prompt, response, and correct answer 
    res.append({'question': question, 'response': response, 'correct_answer': correct_answer})

# notify when execution finishes
text_to_speech("Hello, responses are done generating!")

# Performance under perturbance