# Preparing Dataset for SFT

In [1]:
import os
!pip install --upgrade -qqq uv
if "COLAB_" not in "".join(os.environ.keys()):
    # If you're not in Colab, just use pip install!
    !pip install unsloth vllm
else:
    try: import numpy, PIL; get_numpy = f"numpy=={numpy.__version__}"; get_pil = f"pillow=={PIL.__version__}"
    except: get_numpy = "numpy"; get_pil = "pillow"
    try: import subprocess; is_t4 = "Tesla T4" in str(subprocess.check_output(["nvidia-smi"]))
    except: is_t4 = False
    get_vllm, get_triton = ("vllm==0.9.2", "triton==3.2.0") if is_t4 else ("vllm==0.11.2", "triton")
    !uv pip install -qqq --upgrade \
        unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers
    !uv pip install -qqq {get_triton}
!uv pip install transformers==4.56.2
!uv pip install --no-deps trl==0.22.2

[2mUsing Python 3.11.13 environment at: /usr[0m
[2K[2mResolved [1m31 packages[0m [2min 17ms[0m[0m                                         [0m
[2mUninstalled [1m1 package[0m [2min 78ms[0m[0m
[2K[2mInstalled [1m1 package[0m [2min 58ms[0m[0m                                 [0m
 [31m-[39m [1mtransformers[0m[2m==4.57.3[0m
 [32m+[39m [1mtransformers[0m[2m==4.56.2[0m
[2mUsing Python 3.11.13 environment at: /usr[0m
[2K[2mResolved [1m1 package[0m [2min 1ms[0m[0m                                            [0m
[2mUninstalled [1m1 package[0m [2min 2ms[0m[0m
[2K[2mInstalled [1m1 package[0m [2min 6ms[0m[0m                                  [0m
 [31m-[39m [1mtrl[0m[2m==0.24.0[0m
 [32m+[39m [1mtrl[0m[2m==0.22.2[0m


In [2]:
import re
from datasets import load_dataset, Dataset

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Can increase for longer reasoning traces
lora_rank = 32 # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-Math-1.5B",
    max_seq_length = max_seq_length,
    load_in_4bit = False, # False for LoRA 16bit
    fast_inference = True, # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.8, # Reduce if out of memory
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = lora_rank*2, # *2 speeds up training
    use_gradient_checkpointing = "unsloth", # Reduces memory usage
    random_state = 3407,
)

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-12-18 00:32:48.310195: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766017968.542504     184 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766017968.609737     184 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 12-18 00:33:13 [__init__.py:244] Automatically detected platform cuda.
ERROR 12-18 00:33:15 [fa_utils.py:57] Cannot use FA version 2 is not supported due to FA2 is only supported on devices with compute capability >= 8
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
INFO 12-18 00:33:28 [vllm_utils.py:702] Unsloth: Patching vLLM v1 graph capture
INFO 12-18 00:33:28 [vllm_utils.py:732] Unsloth: Patching vLLM v0 graph capture
==((====))==  Unsloth 2025.12.6: Fast Qwen2 patching. Transformers: 4.56.2. vLLM: 0.9.2.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/Qwen2.5-Math-1.5B with actual GPU utilization = 79.24%

`torch_dtype` is deprecated! Use `dtype` instead!


INFO 12-18 00:33:45 [config.py:1472] Using max model len 2048
INFO 12-18 00:33:48 [config.py:2285] Chunked prefill is enabled with max_num_batched_tokens=4096.
INFO 12-18 00:33:48 [llm_engine.py:230] Initializing a V0 LLM engine (v0.9.2) with config: model='unsloth/Qwen2.5-Math-1.5B', speculative_config=None, tokenizer='unsloth/Qwen2.5-Math-1.5B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoi

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/165 [00:00<?, ?B/s]

INFO 12-18 00:33:52 [cuda.py:311] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 12-18 00:33:52 [cuda.py:360] Using XFormers backend.
INFO 12-18 00:33:53 [parallel_state.py:1076] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
INFO 12-18 00:33:53 [model_runner.py:1171] Starting to load model unsloth/Qwen2.5-Math-1.5B...


[W1218 00:33:53.030502736 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W1218 00:33:53.031240384 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 12-18 00:33:54 [weight_utils.py:292] Using model weights format ['*.safetensors']


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

INFO 12-18 00:34:03 [weight_utils.py:308] Time spent downloading weights for unsloth/Qwen2.5-Math-1.5B: 8.573011 seconds
INFO 12-18 00:34:03 [weight_utils.py:345] No model.safetensors.index.json found in remote.


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 12-18 00:34:05 [default_loader.py:272] Loading weights took 2.55 seconds
INFO 12-18 00:34:05 [punica_selector.py:19] Using PunicaWrapperGPU.
INFO 12-18 00:34:07 [model_runner.py:1203] Model loading took 2.9483 GiB and 11.691890 seconds
INFO 12-18 00:34:20 [worker.py:294] Memory profiling takes 13.39 seconds
INFO 12-18 00:34:20 [worker.py:294] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.79) = 11.68GiB
INFO 12-18 00:34:20 [worker.py:294] model weights take 2.95GiB; non_torch_memory takes 0.03GiB; PyTorch activation peak memory takes 0.36GiB; the rest of the memory reserved for KV Cache is 8.34GiB.
INFO 12-18 00:34:21 [executor_base.py:113] # cuda blocks: 19528, # CPU blocks: 9362
INFO 12-18 00:34:21 [executor_base.py:118] Maximum concurrency for 2048 tokens per request: 152.56x
INFO 12-18 00:34:25 [vllm_utils.py:737] Unsloth: Running patched vLLM v0 `capture_model`.
INFO 12-18 00:34:25 [model_runner.py:1513] Capturing cudagraphs for deco

Capturing CUDA graph shapes:   0%|          | 0/9 [00:00<?, ?it/s]

INFO 12-18 00:34:36 [model_runner.py:1671] Graph capturing finished in 11 secs, took 0.11 GiB
INFO 12-18 00:34:36 [vllm_utils.py:744] Unsloth: Patched vLLM v0 graph capture finished in 11 secs.
INFO 12-18 00:34:38 [llm_engine.py:428] init engine (profile, create kv cache, warmup model) took 31.15 seconds
Unsloth: Just some info: will skip parsing ['k_norm', 'norm', 'norm2', 'post_feedforward_layernorm', 'attention_norm', 'post_layernorm', 'layer_norm1', 'post_attention_layernorm', 'ffn_norm', 'input_layernorm', 'norm1', 'layer_norm2', 'q_norm', 'pre_feedforward_layernorm']


Some weights of Qwen2ForCausalLM were not initialized from the model checkpoint at unsloth/Qwen2.5-Math-1.5B and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Performing substitution for additional_keys=set()
Unsloth: Just some info: will skip parsing ['k_norm', 'norm', 'norm2', 'post_feedforward_layernorm', 'attention_norm', 'post_layernorm', 'layer_norm1', 'cross_attn_post_attention_layernorm', 'post_attention_layernorm', 'ffn_norm', 'input_layernorm', 'norm1', 'layer_norm2', 'q_norm', 'cross_attn_input_layernorm', 'pre_feedforward_layernorm']


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth 2025.12.6 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:
mathlight_ds = load_dataset("DigitalLearningGmbH/MATH-lighteval", "default",split="train")

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/2.99M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/1.86M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [5]:
gsm8k_ds = load_dataset("openai/gsm8k", "main",split='train')

README.md: 0.00B [00:00, ?B/s]

main/train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

main/test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [6]:
from datasets import interleave_datasets
dataset = interleave_datasets(
    [gsm8k_ds, mathlight_ds],
    probabilities=[0.5, 0.5],
    seed=42
)


In [7]:
dataset = dataset.to_pandas()

In [8]:
# Helper function to extract answer from the dataset
def get_boxed_answer(text: str) -> str | None:
    # handles \boxed{...} and \fbox{...}
    patterns = [
        r"\\boxed\s*{([^}]*)}",
        r"\\fbox\s*{([^}]*)}"
    ]
    for p in patterns:
        m = re.search(p, text)
        if m:
            return m.group(1).strip()
    return None


def get_hash_answer(text: str) -> str | None:
    if "####" not in text:
        return None
    return text.split("####")[-1].strip()

def extract_final_answer(text: str) -> str | None:
    return (
        get_hash_answer(text)
        or get_boxed_answer(text)
    )



In [9]:
SYSTEM_PROMPT = """You are a precise mathematical problem solver.

Your task is to solve the given problem by producing:
1. A clear, logically ordered sequence of reasoning steps.
2. A single final answer.

Rules:
- Do NOT skip intermediate reasoning.
- Each step must follow logically from the previous one.
- Use exact arithmetic and correct mathematical notation.
- Do NOT include commentary, explanations, or alternative methods.
- The final answer must be explicit and unambiguous.

Output Format:
<step>...</step>
<step>...</step>

<final_answer>
...
</final_answer>
"""


In [10]:
def format_sample(solution: str, final_answer: str) -> str:
    return f"""<step>{solution.strip()}</step><final_answer>{final_answer}</final_answer>"""


In [11]:
def remove_boxed(text: str) -> str:
    patterns = [
        r"\\boxed\s*{[^}]*}",
        r"\\fbox\s*{[^}]*}"
    ]
    for p in patterns:
        text = re.sub(p, "", text)
    # remove orphaned LaTeX math delimiters left behind
    text = re.sub(r"\$\s*\$", "", text)
    return text.strip()


In [12]:
def build_sft_sample(example):
    # GSM8K: questions in "question"
    # MathLight: often "problem"
    question = example.get("question") or example.get("problem")

    # GSM8K: solution in "answer"
    # MathLight: often "solution" or similar
    solution_text = example.get("answer") or example.get("solution")
    if solution_text is None:
        return None

    final_answer = extract_final_answer(solution_text)
    if final_answer is None:
        return None  # drop bad samples

    formatted = format_sample(
        solution=solution_text.split("####")[0] if "####" in solution_text else remove_boxed(solution_text),
        final_answer=final_answer,
    )

    return [
        {"role" : "system",    "content" : SYSTEM_PROMPT},
        {"role" : "user",      "content" : question},
        {"role" : "assistant", "content" : formatted},
    ]

dataset["Messages"] = dataset.apply(build_sft_sample, axis = 1)

In [13]:
dataset["Messages"][7]

[{'role': 'system',
  'content': 'You are a precise mathematical problem solver.\n\nYour task is to solve the given problem by producing:\n1. A clear, logically ordered sequence of reasoning steps.\n2. A single final answer.\n\nRules:\n- Do NOT skip intermediate reasoning.\n- Each step must follow logically from the previous one.\n- Use exact arithmetic and correct mathematical notation.\n- Do NOT include commentary, explanations, or alternative methods.\n- The final answer must be explicit and unambiguous.\n\nOutput Format:\n<step>...</step>\n<step>...</step>\n\n<final_answer>\n...\n</final_answer>\n'},
 {'role': 'user',
  'content': 'Find the center of the circle with equation $x^2 - 6x + y^2 + 2y = 9$.'},
 {'role': 'assistant',
  'content': '<step>Completing the square, we get $(x - 3)^2 + (y + 1)^2 = 19$. Therefore, the center of the circle is .</step><final_answer>(3, -1)</final_answer>'}]

In [14]:
dataset

Unnamed: 0,question,answer,problem,level,solution,type,Messages
0,,,"Let \[f(x) = \left\{\n\begin{array}{cl} ax+3, ...",Level 5,"For the piecewise function to be continuous, t...",Algebra,"[{'role': 'system', 'content': 'You are a prec..."
1,Natalia sold clips to 48 of her friends in Apr...,Natalia sold 48/2 = <<48/2=24>>24 clips in May...,,,,,"[{'role': 'system', 'content': 'You are a prec..."
2,,,A rectangular band formation is a formation wi...,Level 5,Let $x$ be the number of band members in each ...,Algebra,"[{'role': 'system', 'content': 'You are a prec..."
3,,,What is the degree of the polynomial $(4 +5x^3...,Level 3,This polynomial is not written in standard for...,Algebra,"[{'role': 'system', 'content': 'You are a prec..."
4,Weng earns $12 an hour for babysitting. Yester...,Weng earns 12/60 = $<<12/60=0.2>>0.2 per minut...,,,,,"[{'role': 'system', 'content': 'You are a prec..."
...,...,...,...,...,...,...,...
14948,,,"In triangle $ABC,$ $AB = 9,$ $BC = 10,$ and $A...",Level 3,"By Heron's formula, the area of triangle $ABC$...",Precalculus,"[{'role': 'system', 'content': 'You are a prec..."
14949,"Janet, a third grade teacher, is picking up th...",Janet needs 35 lunches for the kids + 5 for th...,,,,,"[{'role': 'system', 'content': 'You are a prec..."
14950,,,Two lines are perpendicular. One line has a d...,Level 2,"Since the two lines are perpendicular, their d...",Precalculus,"[{'role': 'system', 'content': 'You are a prec..."
14951,,,Find the smallest positive integer $n$ such th...,Level 3,The matrix\n\[\begin{pmatrix} \cos 170^\circ &...,Precalculus,"[{'role': 'system', 'content': 'You are a prec..."


In [15]:
chat_template = """
{% if messages[0]['role'] == 'system' %}
{{ messages[0]['content'] + eos_token }}
{% set loop_messages = messages[1:] %}
{% else %}
{% set loop_messages = messages %}
{% endif %}
{% for message in loop_messages %}
{% if message['role'] == 'user' %}
{{ message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}
{{ message['content'] + eos_token }}
{% endif %}
{% endfor %}
"""
tokenizer.chat_template = chat_template


In [16]:
tokenizer.apply_chat_template(dataset["Messages"][2], tokenize = False)

'\nYou are a precise mathematical problem solver.\n\nYour task is to solve the given problem by producing:\n1. A clear, logically ordered sequence of reasoning steps.\n2. A single final answer.\n\nRules:\n- Do NOT skip intermediate reasoning.\n- Each step must follow logically from the previous one.\n- Use exact arithmetic and correct mathematical notation.\n- Do NOT include commentary, explanations, or alternative methods.\n- The final answer must be explicit and unambiguous.\n\nOutput Format:\n<step>...</step>\n<step>...</step>\n\n<final_answer>\n...\n</final_answer>\n<|endoftext|>\nA rectangular band formation is a formation with $m$ band members in each of $r$ rows, where $m$ and $r$ are integers. A particular band has less than 100 band members. The director arranges them in a rectangular formation and finds that he has two members left over. If he increases the number of members in each row by 1 and reduces the number of rows by 2, there are exactly enough places in the new forma

In [17]:
dataset["Messages"].isnull().sum()


2

In [18]:
dataset[dataset["Messages"].isnull()]


Unnamed: 0,question,answer,problem,level,solution,type,Messages
1796,,,"What is the largest value of $x$, if $\frac{x}...",Level 3,We multiply both sides of the equation by $10x...,Algebra,
2044,,,"If $(x + y)^2 = 1$ and $xy = -4$, what is the ...",Level 3,We see that $(x + y)^2 = (x^2 + y^2) + 2xy = 1...,Algebra,


In [19]:
dataset = dataset[dataset["Messages"].notnull()]


In [20]:
dataset["text"] = tokenizer.apply_chat_template(dataset["Messages"].values.tolist(), tokenize = False)
dataset = Dataset.from_pandas(dataset)
dataset

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset["text"] = tokenizer.apply_chat_template(dataset["Messages"].values.tolist(), tokenize = False)


Dataset({
    features: ['question', 'answer', 'problem', 'level', 'solution', 'type', 'Messages', 'text', '__index_level_0__'],
    num_rows: 14951
})

In [21]:
dataset['text'][201]

'\nYou are a precise mathematical problem solver.\n\nYour task is to solve the given problem by producing:\n1. A clear, logically ordered sequence of reasoning steps.\n2. A single final answer.\n\nRules:\n- Do NOT skip intermediate reasoning.\n- Each step must follow logically from the previous one.\n- Use exact arithmetic and correct mathematical notation.\n- Do NOT include commentary, explanations, or alternative methods.\n- The final answer must be explicit and unambiguous.\n\nOutput Format:\n<step>...</step>\n<step>...</step>\n\n<final_answer>\n...\n</final_answer>\n<|endoftext|>\nOn a particular day in Salt Lake, UT, the temperature was given by $-t^2 +12t+50$ where $t$ is the time in hours past noon. What is the largest $t$ value at which the temperature was exactly 77 degrees?<|endoftext|>\n<step>We set the temperature equal to 77 degrees: \\begin{align*}\n-t^2 +12t+50&=77\\\\\nt^2-12t+27&=0\\\\\n(t-3)(t-9)&=0\n\\end{align*}We see then that the temperature is 77 degrees exactly 

In [22]:
dataset.to_json(
    "math_sft.jsonl",
    orient="records",
    lines=True
)

Creating json from Arrow format:   0%|          | 0/15 [00:00<?, ?ba/s]

52110882

# SFT 

In [23]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_num_proc = 2,
    packing = False,
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
        logging_steps = 5,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=8):   0%|          | 0/14951 [00:00<?, ? examples/s]

ðŸ¦¥ Unsloth: Padding-free auto-enabled, enabling faster training.


In [24]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 14,951 | Num Epochs = 1 | Total steps = 1,869
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 36,929,536 of 1,580,643,840 (2.34% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,1.5541
10,0.833
15,0.4257
20,0.3492
25,0.3212
30,0.3235
35,0.3706
40,0.3426
45,0.2674
50,0.3072


TrainOutput(global_step=1869, training_loss=0.27803749308476416, metrics={'train_runtime': 4556.2417, 'train_samples_per_second': 3.281, 'train_steps_per_second': 0.41, 'total_flos': 4.492659026809958e+16, 'train_loss': 0.27803749308476416, 'epoch': 1.0})

In [25]:
model.save_pretrained("SFTMathModel")  # Local saving
tokenizer.save_pretrained("SFTMathModel")

('SFTMathModel/tokenizer_config.json',
 'SFTMathModel/special_tokens_map.json',
 'SFTMathModel/chat_template.jinja',
 'SFTMathModel/vocab.json',
 'SFTMathModel/merges.txt',
 'SFTMathModel/added_tokens.json',
 'SFTMathModel/tokenizer.json')

In [39]:
test_question = "Josh decides to try flipping a house. He buys a house for $80,000 and then puts in $50,000 in repairs. This increased the value of the house by 150%. How much profit did he make?" 

In [40]:
dataset_test = [
    {
        "Messages": [
            {
                "role": "system",
                "content": SYSTEM_PROMPT
            },
            {
                "role": "user",
                "content": test_question
            }
        ]
    }
]
dataset_test

[{'Messages': [{'role': 'system',
    'content': 'You are a precise mathematical problem solver.\n\nYour task is to solve the given problem by producing:\n1. A clear, logically ordered sequence of reasoning steps.\n2. A single final answer.\n\nRules:\n- Do NOT skip intermediate reasoning.\n- Each step must follow logically from the previous one.\n- Use exact arithmetic and correct mathematical notation.\n- Do NOT include commentary, explanations, or alternative methods.\n- The final answer must be explicit and unambiguous.\n\nOutput Format:\n<step>...</step>\n<step>...</step>\n\n<final_answer>\n...\n</final_answer>\n'},
   {'role': 'user',
    'content': 'Josh decides to try flipping a house. He buys a house for $80,000 and then puts in $50,000 in repairs. This increased the value of the house by 150%. How much profit did he make?'}]}]

In [41]:
dataset_test[0]["Messages"][:2]

[{'role': 'system',
  'content': 'You are a precise mathematical problem solver.\n\nYour task is to solve the given problem by producing:\n1. A clear, logically ordered sequence of reasoning steps.\n2. A single final answer.\n\nRules:\n- Do NOT skip intermediate reasoning.\n- Each step must follow logically from the previous one.\n- Use exact arithmetic and correct mathematical notation.\n- Do NOT include commentary, explanations, or alternative methods.\n- The final answer must be explicit and unambiguous.\n\nOutput Format:\n<step>...</step>\n<step>...</step>\n\n<final_answer>\n...\n</final_answer>\n'},
 {'role': 'user',
  'content': 'Josh decides to try flipping a house. He buys a house for $80,000 and then puts in $50,000 in repairs. This increased the value of the house by 150%. How much profit did he make?'}]

In [42]:
text = tokenizer.apply_chat_template(
    dataset_test[0]["Messages"][:2],
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 1,
    max_new_tokens = 256,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)


You are a precise mathematical problem solver.

Your task is to solve the given problem by producing:
1. A clear, logically ordered sequence of reasoning steps.
2. A single final answer.

Rules:
- Do NOT skip intermediate reasoning.
- Each step must follow logically from the previous one.
- Use exact arithmetic and correct mathematical notation.
- Do NOT include commentary, explanations, or alternative methods.
- The final answer must be explicit and unambiguous.

Output Format:
<step>...</step>
<step>...</step>

<final_answer>
...
</final_answer>
<|endoftext|>
Josh decides to try flipping a house. He buys a house for $80,000 and then puts in $50,000 in repairs. This increased the value of the house by 150%. How much profit did he make?<|endoftext|>
<step>First find the total cost of the house and repairs: $80,000 + $50,000 = $<<80000+50000=130000>>130,000
Then find the increase in value: $130,000 * 150% = $<<130000*150*.01=195000>>195,000
Then subtract the cost of the house and repai

In [43]:
import os
os.chdir(r'/kaggle/working')
# Example: save your model
# model.save('my_model.h5') 


In [49]:
from IPython.display import FileLink
FileLink(r'all_files.zip')
