In [57]:
import os
import yaml
import time
from src.utils import batch_generate, tokens_generate, run_inference
from src.mem import check_memory
import importlib
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from dotenv import load_dotenv
import plotly.express as px
import torch
import torch.nn.functional as F
import huggingface
from datetime import datetime
load_dotenv('secrets.env')

with open("config/config.yaml", "r") as f: 
    config = yaml.safe_load(f)

ds = config['datasets']['big_bench']
model_name = config['models']['deepseek_r1_qwendistill_1.5']['path']

if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

In [5]:
dataset = load_dataset(ds['source'], ds['subset']).shuffle(config['seed'])
tokenizer = AutoTokenizer.from_pretrained(model_name) 
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# Experiment
Goal: test model performance on 100 BBH questions w/ chain of thought reasoning =D

In [8]:
df = pd.DataFrame(dataset['train'][0:10])

batches = batch_generate(df, ds['input_column'], ds['target_column'])
tokens = tokens_generate(batches, tokenizer, device = 'mps')
res = run_inference(model, tokens, tokenizer, time_tracking = True)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [51]:
df['input'][0]

"How would a typical person answer each of the following questions about causation?\nJanet is an employee in a factory. Since she works in the maintenance department, she knows how to grease and oil all of the machines in the factory. It is her responsibility to put oil into the machines. Kate is also an employee at the factory. While she works in the human resources department, she knows how to grease and oil all of the machines in the factory. If Janet does not put oil in the machines, it is not Kate's responsibility to do so. One day, Janet forgets to put oil in an important machine. Janet noticed that she did not put oil in the machine. Kate did not notice that Janet did not put oil in the machine, and Kate also did not put oil in the machine. The machine broke down a few days later. Did Kate not putting oil in the machine cause it to break down?\nOptions:\n- Yes\n- No"