In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install bitsandbytes datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.2


In [3]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# **Model Name**
MODEL_NAME = "Vijayendra/DeepSeek-Qwen2.5-14B-DeepThinker-v2"

# **Enable 4-bit Quantization**
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16,  # Use FP16 for computation
    bnb_4bit_use_double_quant=True,  # Enable double quantization
    bnb_4bit_quant_type="nf4"  # Most memory-efficient format
)

# **Load Tokenizer**
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# **Load Model with Quantization**
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,  # Use 4-bit quantization
    device_map="auto"  # Automatically distributes layers across GPUs
)

# **Print Model Device Mapping**
print("\n🚀 Model successfully loaded across GPUs! 🚀")
print(model.hf_device_map)

# ✅ **Load the MATH500 Dataset**
dataset = load_dataset("di-zhang-fdu/MATH500", split="test")

# ✅ **Randomly sample 10 diverse problems for testing**
sampled_data = dataset.shuffle(seed=42).select(range(50))

# ✅ **Extract problem and solution columns**
problems = sampled_data["problem"]
solutions = sampled_data["solution"]

# ✅ **Define Model Inference Function**
def generate_response(model, tokenizer, prompt, max_new_tokens=3200, temperature=0.7):
    """Generates response from the model for a given prompt."""
    
    # **Tokenize the prompt**
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=3200).to(model.device)

    # **Generate text**
    with torch.no_grad():
        output = model.generate(
            inputs.input_ids,
            max_new_tokens=max_new_tokens,  # Lower this if out-of-memory occurs
            temperature=temperature,
            attention_mask=inputs.attention_mask,
            do_sample=True,
            top_k=40,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    return tokenizer.decode(output[0], skip_special_tokens=True)

# ✅ **Generate Model Responses and Print Each Response**
for i, problem in enumerate(problems):
    print(f"\n🟢 Generating response for Problem {i+1}/50")
    print(f"\n🔹 **Problem {i+1}:**\n{problem}")
    model_output = generate_response(model, tokenizer, problem)
    print(f"\n🤖 **Model Output:**\n{model_output}")
    print("\n" + "-"*80)


tokenizer_config.json:   0%|          | 0.00/6.75k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/485 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/866 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/664 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/48.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-000004.safetensors:   0%|          | 0.00/8.71G [00:00<?, ?B/s]

model-00002-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00003-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00004-of-000004.safetensors:   0%|          | 0.00/3.49G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/275M [00:00<?, ?B/s]


🚀 Model successfully loaded across GPUs! 🚀
{'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10': 0, 'model.layers.11': 0, 'model.layers.12': 0, 'model.layers.13': 0, 'model.layers.14': 1, 'model.layers.15': 1, 'model.layers.16': 1, 'model.layers.17': 1, 'model.layers.18': 1, 'model.layers.19': 1, 'model.layers.20': 1, 'model.layers.21': 1, 'model.layers.22': 1, 'model.layers.23': 1, 'model.layers.24': 1, 'model.layers.25': 1, 'model.layers.26': 1, 'model.layers.27': 1, 'model.layers.28': 1, 'model.layers.29': 1, 'model.layers.30': 1, 'model.layers.31': 1, 'model.layers.32': 1, 'model.layers.33': 1, 'model.layers.34': 1, 'model.layers.35': 1, 'model.layers.36': 1, 'model.layers.37': 1, 'model.layers.38': 1, 'model.layers.39': 1, 'model.layers.40': 1, 'model.layers.41': 1, 'model.layers.42

test.jsonl:   0%|          | 0.00/447k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]


🟢 Generating response for Problem 1/50

🔹 **Problem 1:**
Below is a magic square, meaning that the sum of the numbers in each row, in each column, and in each of the $2$ main diagonals are equal. What is the value of $n$?

[asy]size(125);
for(int i = 0; i<4; ++i)
{

draw((0,i)--(3,i),linewidth(1));
}

for(int j = 0; j<4; ++j)
{

draw((j,0)--(j,3),linewidth(1));
}

label("$n-3$",(.5,.5));
label("3",(.5,1.5));
label("$n+1$",(.5,2.5));

label("$n+2$",(1.5,.5));
label("$2n-9$",(1.5,1.5));
label("$1$",(1.5,2.5));

label("$2$",(2.5,.5));
label("$n$",(2.5,1.5));
label("$n-1$",(2.5,2.5));
[/asy]

🤖 **Model Output:**
Below is a magic square, meaning that the sum of the numbers in each row, in each column, and in each of the $2$ main diagonals are equal. What is the value of $n$?

[asy]size(125);
for(int i = 0; i<4; ++i)
{

draw((0,i)--(3,i),linewidth(1));
}

for(int j = 0; j<4; ++j)
{

draw((j,0)--(j,3),linewidth(1));
}

label("$n-3$",(.5,.5));
label("3",(.5,1.5));
label("$n+1$",(.5,2.5));

la