In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
import re
import json

import os

# Load model and tokenizer
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")

# Make sure padding token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

# Function to download GSM8K dataset from GitHub
def download_gsm8k():
    import requests
    import os
    
    # Create data directory if it doesn't exist
    os.makedirs("gsm8k_data", exist_ok=True)
    
    # URLs for train and test data
    train_url = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/train.jsonl"
    test_url = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl"
    
    # Download train set
    train_response = requests.get(train_url)
    with open("gsm8k_data/train.jsonl", "wb") as f:
        f.write(train_response.content)
    
    # Download test set
    test_response = requests.get(test_url)
    with open("gsm8k_data/test.jsonl", "wb") as f:
        f.write(test_response.content)
    
    print("GSM8K dataset downloaded successfully.")

# Function to load the GSM8K dataset from files
def load_gsm8k_from_file():
    train_data = []
    test_data = []
    
    # Check if files exist, download if not
    if not (os.path.exists("gsm8k_data/train.jsonl") and os.path.exists("gsm8k_data/test.jsonl")):
        download_gsm8k()
    
    # Load train data
    with open("gsm8k_data/train.jsonl", "r") as f:
        for line in f:
            train_data.append(json.loads(line))
    
    # Load test data
    with open("gsm8k_data/test.jsonl", "r") as f:
        for line in f:
            test_data.append(json.loads(line))
    
    print(f"Loaded {len(train_data)} training examples and {len(test_data)} test examples")
    return train_data, test_data

# Load our GSM8K dataset
train_set, test_set = load_gsm8k_from_file()

# Function to extract the numerical answer
def extract_answer(text):
    if "####" in text:
        answer_part = text.split("####")[-1].strip()
        numbers = re.findall(r'[-+]?\d*\.?\d+', answer_part)
        if numbers:
            return float(numbers[0])
    
    # Fallback: extract the last number in the full text
    numbers = re.findall(r'[-+]?\d*\.?\d+', text)
    if numbers:
        return float(numbers[-1])
    
    return None

# For demonstration, let's use a sample from GSM8K
sample_idx = 5  # You can change this to try different examples
sample = test_set[sample_idx]
question = sample["question"]
reference_answer = sample["answer"]

print(f"Question: {question}")
print(f"Reference answer: {reference_answer}")

# Create the prompt for DeepSeek model
# The DeepSeek models work well with explicit chain-of-thought prompting
prompt = f"{question}Let's solve it step by step. Make sure to stop immediately after writing the final answer. You MUST write the final answer as an integer after '####'."

# Generate answer
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
generated_ids = model.generate(
    inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=512,
    temperature=0.1,
    do_sample=True,
    top_p=0.95,
    pad_token_id=tokenizer.pad_token_id
)

response = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

print("\nDeepSeek Solution:")
print(response)

# Extract the numerical answer for evaluation
predicted_answer = extract_answer(response)
reference_numeric = extract_answer(reference_answer)

print(f"\nExtracted answer: {predicted_answer}")
print(f"Reference numeric answer: {reference_numeric}")

# Check if the answer is correct
if predicted_answer is not None and reference_numeric is not None:
    # Allow for small floating point differences
    if abs(predicted_answer - reference_numeric) < 1e-6:
        print("✓ Correct!")
    else:
        print("✗ Incorrect")
else:
    print("Could not extract a numeric answer")

GSM8K dataset downloaded successfully.
Loaded 7473 training examples and 1319 test examples
Question: Kylar went to the store to buy glasses for his new apartment. One glass costs $5, but every second glass costs only 60% of the price. Kylar wants to buy 16 glasses. How much does he need to pay for them?
Reference answer: The discount price of one glass is 60/100 * 5 = $<<60/100*5=3>>3.
If every second glass is cheaper, that means Kylar is going to buy 16 / 2 = <<16/2=8>>8 cheaper glasses.
So for the cheaper glasses, Kylar is going to pay 8 * 3 = $<<8*3=24>>24.
And for the regular-priced glasses, Kylar will pay 8 * 5 = $<<8*5=40>>40.
So in total Kylar needs to pay 24 + 40 = $<<24+40=64>>64 for the glasses he wants to buy.
#### 64

DeepSeek Solution:
'
</think>

Kylar wants to buy 16 glasses, each costing $5, but every second glass costs only 60% of the price. Let's break down the cost step by step.

1. **First 8 glasses:**
   - Each glass costs $5.
   - Total cost for 8 glasses: \(8 \t

### Chain of Thought

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
import re
import random

# Load model and tokenizer
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")

# Make sure padding token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

# Load GSM8K dataset
gsm8k = load_dataset("gsm8k", "main")
test_set = gsm8k["test"]
train_set = gsm8k["train"]

# Function to extract the numerical answer
def extract_answer(text):
    if "####" in text:
        answer_part = text.split("####")[-1].strip()
        numbers = re.findall(r'[-+]?\d*\.?\d+', answer_part)
        if numbers:
            return float(numbers[0])
    
    # Fallback: extract the last number in the full text
    numbers = re.findall(r'[-+]?\d*\.?\d+', text)
    if numbers:
        return float(numbers[-1])
    
    return None

# Function to create 5-shot prompt
def create_five_shot_prompt(train_examples, question):
    # Set random seed for reproducibility
    random.seed(42)
    
    # Sample 5 examples from the training set
    shot_examples = random.sample(train_examples, 5)
    
    # Build the few-shot prompt
    prompt = ""
    for ex in shot_examples:
        prompt += f"Question: {ex['question']}\n"
        prompt += f"Answer: {ex['answer']}\n\n"
    
    # Add the current question
    prompt += f"Question: {question} Let's solve it step by step. Please stop after you provided the final answer. You MUST provide the final answer as an integer after '####'.\n"

    
    return prompt

# For demonstration, let's use a sample from GSM8K
sample_idx = 5  # You can change this to try different examples
sample = test_set[sample_idx]
question = sample["question"]
reference_answer = sample["answer"]

print(f"Question: {question}")
print(f"Reference answer: {reference_answer}")

# Create 5-shot prompt
five_shot_prompt = create_five_shot_prompt(list(train_set), question)

print("\n5-Shot Prompt (excerpt):")
# Print the beginning and end of the prompt if it's long
if len(five_shot_prompt) > 500:
    print(five_shot_prompt[:250] + "...\n..." + five_shot_prompt[-250:])
else:
    print(five_shot_prompt)

# Generate answer
inputs = tokenizer(five_shot_prompt, return_tensors="pt").to("cuda")
generated_ids = model.generate(
    inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=512,
    temperature=0.1,
    do_sample=True,
    top_p=0.95,
    pad_token_id=tokenizer.pad_token_id
)

response = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

print("\nDeepSeek Solution:")
print(response)

# Extract the numerical answer for evaluation
predicted_answer = extract_answer(response)
reference_numeric = extract_answer(reference_answer)

print(f"\nExtracted answer: {predicted_answer}")
print(f"Reference numeric answer: {reference_numeric}")

# Check if the answer is correct
if predicted_answer is not None and reference_numeric is not None:
    # Allow for small floating point differences
    if abs(predicted_answer - reference_numeric) < 1e-6:
        print("✓ Correct!")
    else:
        print("✗ Incorrect")
else:
    print("Could not extract a numeric answer")

DataFilesNotFoundError: No (supported) data files found in gsm8k