In [136]:
import os
import yaml
import time
from src.utils import batch_generate, tokens_generate, run_inference
from src.mem import check_memory
import importlib
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from dotenv import load_dotenv
import plotly.express as px
import torch
import torch.nn.functional as F
import huggingface
from datetime import datetime
load_dotenv('secrets.env')

with open("config/config.yaml", "r") as f: 
    config = yaml.safe_load(f)

model_str = 'deepseek_r1_qwendistill_1.5'
eval_df = 'big_bench'

ds = config['datasets'][eval_df]
model_path = config['models'][model]['path']

if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

In [139]:
dataset = load_dataset(ds['source'], ds['subset']).shuffle(config['seed'])
tokenizer = AutoTokenizer.from_pretrained(model_path) 
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

# Experiment
Goal: test model performance on 100 BBH questions w/ chain of thought reasoning =D

In [8]:
df = pd.DataFrame(dataset['train'][0:10])

batches = batch_generate(df, ds['input_column'], ds['target_column'])
tokens = tokens_generate(batches, tokenizer, device = 'mps')
res = run_inference(model, tokens, tokenizer, time_tracking = True)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [146]:
importlib.reload(src.instruct)
from src.instruct import apply_instruct_format

test = apply_instruct_format(df['input'], model = model_str, is_math = True)

In [129]:
print(test[3])

"<im_start|>system
You are an AI assistant specialized in logical and causal reasoning.<|im_end|>
<|im_start|>user
please reason step by step, and put your final answer within \boxed{}. How would a typical person answer each of the following questions about causation?
Joe was about to go pick up his child from a school in the suburbs. In the parking lot, he stopped to talk with friends. Because of this delay in picking up his son, Joe's neighbor, who waited 15 min for Joe to show up, took the child home in his car. On the way home, the neighbor's car was struck by a drunk driver. Joe's son, the only seriously hurt victim, received severe leg injuries. Joe's behavior stems from his talkativeness. He is the type of person who likes to talk to anybody, at any time. Sometimes his friends are bothered by this seemingly excessive talkativeness and they even complain about it occasionally. But, after they got to know him better, they realized that Joe's talkativeness was a basic trait of his 

In [147]:
print(tokenizer.decode(response[0], skip_special_tokens = True))

"<im_start|>system
You are an AI assistant specialized in logical and causal reasoning.<|im_end|>
<|im_start|>user
please reason step by step, and put your final answer within \boxed{}. How would a typical person answer each of the following questions about causation?
Joe was about to go pick up his child from a school in the suburbs. In the parking lot, he stopped to talk with friends. Because of this delay in picking up his son, Joe's neighbor, who waited 15 min for Joe to show up, took the child home in his car. On the way home, the neighbor's car was struck by a drunk driver. Joe's son, the only seriously hurt victim, received severe leg injuries. Joe's behavior stems from his talkativeness. He is the type of person who likes to talk to anybody, at any time. Sometimes his friends are bothered by this seemingly excessive talkativeness and they even complain about it occasionally. But, after they got to know him better, they realized that Joe's talkativeness was a basic trait of his 

In [151]:
tokens = tokenizer(test[3], padding = True, truncation = True, return_tensors = 'pt').to(device)
response = model.generate(**tokens, temperature = 0.6)
full_response = tokenizer.decode(response[0], skip_special_tokens = True)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [152]:
full_response.replace(test[3], '').strip() # nice! 

"Okay, so I need to figure out whether the drunk driver caused the injury to Joe's son based"