In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
import re
import json
import requests
import os
import sys

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

from utils.helper import get_device, extract_answer_gsm8k

# Load model and tokenizer
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")

# Check for available hardware
device = get_device()

# Make sure padding token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id


# Load GSM8K dataset
gsm8k = load_dataset("gsm8k", "main")
test_set = gsm8k["test"]
train_set = gsm8k["train"]


# For demonstration, let's use a sample from GSM8K
sample_idx = 5  # You can change this to try different examples
sample = test_set[sample_idx]
question = sample["question"]
reference_answer = sample["answer"]

print(f"Question: {question}")
print(f"Reference answer: {reference_answer}")

# Create the prompt for DeepSeek model
# The DeepSeek models work well with explicit chain-of-thought prompting
prompt = f"{question}Let's solve it step by step. Make sure to stop immediately after writing the final answer. You MUST write the final answer as an integer after '####'."

# Generate answer
inputs = tokenizer(prompt, return_tensors="pt").to(device)
generated_ids = model.generate(
    inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=512,
    temperature=0.1,
    do_sample=True,
    top_p=0.95,
    pad_token_id=tokenizer.pad_token_id
)

response = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

print("\nDeepSeek Solution:")
print(response)

# Extract the numerical answer for evaluation
predicted_answer = extract_answer_gsm8k(response)
reference_numeric = extract_answer_gsm8k(reference_answer)

print(f"\nExtracted answer: {predicted_answer}")
print(f"Reference numeric answer: {reference_numeric}")

# Check if the answer is correct
if predicted_answer is not None and reference_numeric is not None:
    # Allow for small floating point differences
    if abs(predicted_answer - reference_numeric) < 1e-6:
        print("✓ Correct!")
    else:
        print("✗ Incorrect")
else:
    print("Could not extract a numeric answer")

Some parameters are on the meta device because they were offloaded to the disk.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Question: Kylar went to the store to buy glasses for his new apartment. One glass costs $5, but every second glass costs only 60% of the price. Kylar wants to buy 16 glasses. How much does he need to pay for them?
Reference answer: The discount price of one glass is 60/100 * 5 = $<<60/100*5=3>>3.
If every second glass is cheaper, that means Kylar is going to buy 16 / 2 = <<16/2=8>>8 cheaper glasses.
So for the cheaper glasses, Kylar is going to pay 8 * 3 = $<<8*3=24>>24.
And for the regular-priced glasses, Kylar will pay 8 * 5 = $<<8*5=40>>40.
So in total Kylar needs to pay 24 + 40 = $<<24+40=64>>64 for the glasses he wants to buy.
#### 64


KeyboardInterrupt: 

### Chain of Thought

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
import re
import random

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

from utils.helper import get_device, extract_answer_gsm8k, create_cot_prompt

# Load model and tokenizer
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")

# Make sure padding token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

# Load GSM8K dataset
gsm8k = load_dataset("gsm8k", "main")
test_set = gsm8k["test"]
train_set = gsm8k["train"]

# For demonstration, let's use a sample from GSM8K
sample_idx = 5  # You can change this to try different examples
sample = test_set[sample_idx]
question = sample["question"]
reference_answer = sample["answer"]

print(f"Question: {question}")
print(f"Reference answer: {reference_answer}")

# Create 5-shot prompt
cot_prompt = create_cot_prompt(list(train_set), n_shot=5)

prompt += f"Question: {cot_prompt} Let's think step by step. At the end, you MUST write the answer as an integer after '####'. Ensure your answer is fully written before stopping.\n"

# Generate answer
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
generated_ids = model.generate(
    inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=512,
    temperature=0.1,
    do_sample=True,
    top_p=0.95,
    pad_token_id=tokenizer.pad_token_id
)

response = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

print("\nDeepSeek Solution:")
print(response)

# Extract the numerical answer for evaluation
predicted_answer = extract_answer_gsm8k(response)
reference_numeric = extract_answer_gsm8k(reference_answer)

print(f"\nExtracted answer: {predicted_answer}")
print(f"Reference numeric answer: {reference_numeric}")

# Check if the answer is correct
if predicted_answer is not None and reference_numeric is not None:
    # Allow for small floating point differences
    if abs(predicted_answer - reference_numeric) < 1e-6:
        print("✓ Correct!")
    else:
        print("✗ Incorrect")
else:
    print("Could not extract a numeric answer")

DataFilesNotFoundError: No (supported) data files found in gsm8k