In [None]:
import pandas as pd
import transformers
import evaluate
import chromadb
import random
import torch

from IPython.display import display, Markdown
from tqdm import tqdm

In [None]:
SEED = 42
random.seed(SEED)
_ = torch.manual_seed(SEED)
DEVICE = (
    torch.device("cuda")
    if torch.cuda.is_available()
    else (
        torch.device("mps")
        if torch.backends.mps.is_available()
        else torch.device("cpu")
    )
)
# QUANTIZATION = (
#     transformers.BitsAndBytesConfig(load_in_8bit=True)
#     if torch.cuda.is_available()
#     else (
#         transformers.QuantoConfig(weights="int8")
#         if torch.backends.mps.is_available()
#         else None
#     )
# )
QUANTIZATION = None
MODEL_NAME = "microsoft/Phi-3.5-mini-instruct"
MAX_NEW_TOKENS = 1000
ROUGE = evaluate.load("rouge")
BLEU = evaluate.load("bleu")
K = 5
CLIENT = chromadb.PersistentClient()
COLLECTION = CLIENT.create_collection("MATH", get_or_create=True)

In [None]:
train_data = pd.read_csv("MATH_train_staging.csv")
test_data = pd.read_csv("MATH_test_staging.csv")
val_data = pd.read_csv("MATH_val_staging.csv")

In [None]:
display(Markdown(train_data.loc[0]["question_text"]))
display(Markdown(train_data.loc[0]["reasoning"]))
display(Markdown(train_data.loc[0]["answer"]))

In [None]:
# for idx, row in tqdm(train_data.iterrows(), desc="populating vector db"):
#     COLLECTION.add(
#         documents=[row["question_text"]],
#         metadatas=[{"reasoning": row["reasoning"], "answer": row["answer"]}],
#         ids=[f"{row['dataset_id']}_{row['question_id']}"],
#     )

In [None]:
llm = transformers.AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map=DEVICE,
    torch_dtype="auto",
    quantization_config=QUANTIZATION,
    trust_remote_code=True,
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
    MODEL_NAME, trust_remote_code=True
)
pipe = transformers.pipeline(
    task="text-generation",
    model=llm,
    tokenizer=tokenizer,
    device_map=DEVICE,
    trust_remote_code=True,
)
generation_args = {
    "max_new_tokens": MAX_NEW_TOKENS,
    "temperature": 0.0,
    "do_sample": False,
}
llm.eval()

In [None]:
def get_neighbors(text, start):
    results = COLLECTION.query(query_texts=text, n_results=K + start)
    neighbors = []
    for i in range(start, K + start):
        neighbor = {
            "question": results["documents"][0][i],
            "reasoning": results["metadatas"][0][i]["reasoning"],
            "answer": results["metadatas"][0][i]["answer"],
        }
        neighbors.append(neighbor)
    return neighbors

In [None]:
def get_prompt(text, start):
    message = []
    neighbors = get_neighbors(text, start)
    for neighbor in neighbors:
        message.append({"role": "user", "content": neighbor["question"]})
        message.append(
            {
                "role": "assistant",
                "content": f"{neighbor['reasoning']}\n\n{neighbor['answer']}",
            }
        )
    message.append({"role": "user", "content": text})
    return message

In [None]:
prompt = get_prompt(train_data.loc[0]["question_text"], 1)
prompt

In [None]:
response = pipe(prompt, **generation_args)[0]["generated_text"][-1]["content"].strip()
display(Markdown(response.replace("\n", "\n\n")))

In [None]:
val_results = {
    "dataset_id": [],
    "question_id": [],
    "bleu": [],
    "rouge1": [],
    "rouge2": [],
    "rougeL": [],
    "response": [],
}

In [None]:
for idx in tqdm(range(len(val_data)), desc="evaluating"):
    row = val_data.loc[idx]
    val_results["dataset_id"].append(row["dataset_id"])
    val_results["question_id"].append(row["question_id"])
    prompt = get_prompt(row["question_text"], 0)
    response = pipe(prompt, **generation_args)[0]["generated_text"][-1][
        "content"
    ].strip()
    val_results["response"].append(response)
    bleu = BLEU.compute(
        predictions=[response], references=[[row["reasoning"], row["answer"]]]
    )
    val_results["bleu"].append(bleu["bleu"])
    rouge = ROUGE.compute(
        predictions=[response],
        references=[[row["reasoning"], row["answer"]]],
        tokenizer=lambda x: x.split(),
    )
    val_results["rouge1"].append(rouge["rouge1"])
    val_results["rouge2"].append(rouge["rouge2"])
    val_results["rougeL"].append(rouge["rougeL"])

In [None]:
pd.DataFrame(val_results).to_csv(
    f"{MODEL_NAME.split("/")[1]}_MATH_{K}_shot_val_results.csv", index=False
)