In [None]:
import json
import pandas as pd

# Initialize a list to store the data
data = []

# Open the .jsonl file and read its content
with open('./Qwen2.5-Math/evaluation/data/aime24/test.jsonl', 'r') as file:
    for line in file:
        # Parse each line as a JSON object
        json_data = json.loads(line)
        
        # Append the parsed JSON data (dictionary) to the data list
        data.append(json_data)

# Convert the list of dictionaries into a pandas DataFrame
df = pd.DataFrame(data)

df.head(5)


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

quantization_config = BitsAndBytesConfig(load_in_32bit=True)  # Or load_in_4bit=True if needed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-Math-7B-Instruct",
    torch_dtype="auto",
    device_map="cuda",
    quantization_config=quantization_config  # Apply the new config
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Math-7B-Instruct")
tokenizer.pad_token = tokenizer.eos_token 

In [None]:
true_answer = 0
for i, row in df.iterrows():
    prompt = (
        "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
        f"<|im_start|>user\n{row['problem']}\nPlease reason step by step, and put your final answer within \\boxed{{}}.<|im_end|>\n"
        "<|im_start|>assistant\n"
    )

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    
    with torch.no_grad():
        generated = model.generate(
            input_ids,
            max_new_tokens=1024,
            do_sample=True,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id
        )
    
    answer = tokenizer.decode(generated[0], skip_special_tokens=True)

    if f'boxed{row["answer"]}' in answer:
        true_answer += 1
    print(f'\nSample: {i} --- Answer: {answer[-100:]}')
    print(f'\nSample: {i} --- Number correct answer: {true_answer}')


In [None]:
import torch.nn.functional as F

true_answer = 0

for i in range(len(df)):
    problem_text = df.iloc[i]['problem']

    prompt = (
        "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
        f"<|im_start|>user\n{problem_text}\nPlease reason step by step, and put your final answer within \\boxed{{}}.<|im_end|>\n"
        "<|im_start|>assistant\n"
    )

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    # Schedule for temperature
    def temperature_schedule(step):
        if step < 10:
            return 0.8
        elif step < 200:
            return 0.5
        else:
            return 0.2

    # Generation setup
    generated = input_ids
    answer_tokens = []
    past_key_values = None
    max_new_tokens = 1024

    for step in range(max_new_tokens):
        with torch.no_grad():
            if step == 0:
                outputs = model(input_ids=generated, use_cache=True)
            else:
                outputs = model(input_ids=next_token, use_cache=True, past_key_values=past_key_values)

            logits = outputs.logits[:, -1, :]
            past_key_values = outputs.past_key_values

            temp = temperature_schedule(step)
            probs = F.softmax(logits / temp, dim=-1)
            next_token = torch.multinomial(probs, num_samples=1)

        generated = torch.cat([generated, next_token], dim=-1)
        answer_tokens.append(next_token[0].item())

        if next_token[0].item() == tokenizer.eos_token_id:
            break

    # Decode only once at the end
    answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
    print(f'\nSample: {i} --- Answer: {answer[-100:]}')
    if f'\\boxed{{{df.iloc[i]["answer"]}}}' in answer:
        true_answer += 1

    print(f'Sample: {i} --- Number correct answer: {true_answer}')
