In [1]:
# --- Step 1: Install the core Unsloth library ---
# This step remains the same. It sets up the correct PyTorch version for the environment.
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# --- Step 2: Install a SPECIFIC, older version of xformers ---
# We pin xformers to a version known to be compatible with the torch version Unsloth installs.
# This prevents pip from auto-upgrading torch and breaking the environment.
# Version 0.0.25.post1 is a stable choice available in your environment's pip index.
!pip install xformers==0.0.29.post2

# --- Step 3: Ensure Transformers and Datasets are up-to-date ---
# This ensures data handling and model interface libraries are compatible.
# !pip install "datasets>=2.16.0"

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-nndmxbcn/unsloth_14fa39bd9de444bf85587a90b460cb0e
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-nndmxbcn/unsloth_14fa39bd9de444bf85587a90b460cb0e
  Resolved https://github.com/unslothai/unsloth.git to commit cfea95e051396cc79f7867647d35113893d46b6a
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.7.8 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.7.8-py3-none-any.whl.metadata (8.1 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.git

In [2]:
from google.colab import userdata
from huggingface_hub import notebook_login

# Securely log in using the token stored in Colab Secrets
hf_token = userdata.get('HF_TOKEN')
if not hf_token:
    raise ValueError("Hugging Face token not found. Please follow the prerequisite steps to add 'HF_TOKEN' to Colab Secrets.")

notebook_login(hf_token)



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [11]:
from unsloth import FastLanguageModel
import torch

# --- Model Configuration ---
model_name = "unsloth/Phi-4-mini-instruct"
max_seq_length = 4096
load_in_4bit = True

# Load the model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
    dtype = None,
    rope_scaling=False
)

# --- CRITICAL FIX: Set the official Jinja2 chat template ---
# This is the correct, robust way to configure the tokenizer.
tokenizer.chat_template = """{%- for message in messages -%}
    {%- if message['role'] == 'system' -%}
        {{- '<|system|>' + message['content'] + '<|end|>' -}}
    {%- elif message['role'] == 'user' -%}
        {{- '<|user|>' + message['content'] + '<|end|>' -}}
    {%- elif message['role'] == 'assistant' -%}
        {{- '<|assistant|>' + message['content'] + '<|end|>' -}}
    {%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
    {{- '<|assistant|>' -}}
{%- endif -%}"""

print("Model and tokenizer loaded and configured with the correct chat template.")

==((====))==  Unsloth 2025.7.7: Fast Phi3 patching. Transformers: 4.53.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model and tokenizer loaded and configured with the correct chat template.


In [4]:
# from google.colab import drive
# from datasets import load_from_disk

# # Mount Google Drive
# drive.mount('/content/drive')

# # --- IMPORTANT: Change this path to point to your dataset folder ---
# dataset_path_in_drive = "/content/drive/MyDrive/sft-datasets/verifier-v2-two-task/sft_dataset"

# # Load the pre-processed dataset from disk
# sft_dataset_dict = load_from_disk(dataset_path_in_drive)

# print("Dataset loaded successfully from Google Drive.")
# print(sft_dataset_dict)

import zipfile
from datasets import load_from_disk

# --- Define the name of the zip file and the target extraction directory ---
zip_filename = "verifier-v2-two-task.zip"
extracted_folder_name = "sft_dataset_from_zip"

# --- 1. Unzip the uploaded file ---
print(f"Looking for uploaded file: {zip_filename}")
try:
    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        # Extract to a new directory in the Colab session storage
        zip_ref.extractall(extracted_folder_name)
    print(f"Successfully unzipped '{zip_filename}' to '{extracted_folder_name}/'")
except FileNotFoundError:
    raise FileNotFoundError(
        f"ERROR: '{zip_filename}' not found in the Colab session storage. "
        "Please make sure you have uploaded the file and that the name matches exactly."
    )

Looking for uploaded file: verifier-v2-two-task.zip
Successfully unzipped 'verifier-v2-two-task.zip' to 'sft_dataset_from_zip/'


RuntimeError: ERROR: Could not load the dataset from 'sft_dataset_from_zip/sft_dataset'. Please check that the zip file contained a folder named 'sft_dataset'. Details: Directory sft_dataset_from_zip/sft_dataset not found

In [6]:
# --- 2. Load the dataset from the extracted directory ---
# The path inside the zip file was 'sft_dataset', so we load from there.
dataset_path_from_zip = f"{extracted_folder_name}/verifier-v2-two-task/sft_dataset"

try:
    sft_dataset_dict = load_from_disk(dataset_path_from_zip)
    print("\nDataset loaded successfully from the unzipped folder.")
    print(sft_dataset_dict)
except Exception as e:
     raise RuntimeError(
        f"ERROR: Could not load the dataset from '{dataset_path_from_zip}'. "
        f"Please check that the zip file contained a folder named 'sft_dataset'. Details: {e}"
    )


Dataset loaded successfully from the unzipped folder.
DatasetDict({
    train: Dataset({
        features: ['index', 'task', 'type', 'source', 'text', '__index_level_0__'],
        num_rows: 2846
    })
    validation: Dataset({
        features: ['index', 'task', 'type', 'source', 'text', '__index_level_0__'],
        num_rows: 360
    })
    test: Dataset({
        features: ['index', 'task', 'type', 'source', 'text', '__index_level_0__'],
        num_rows: 359
    })
})


In [12]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # As requested
    lora_alpha = 32, # Standard practice is 2 * r
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
)

print("LoRA adapters added to the model.")

Unsloth: Making `model.base_model.model.model` require gradients
LoRA adapters added to the model.


In [13]:
from trl import SFTTrainer
from transformers import TrainingArguments

# --- Hyperparameters ---
training_epochs = 3
learning_rate = 2e-4
batch_size = 2 # Adjust based on memory
gradient_accumulation = 8 # Effective batch size = batch_size * gradient_accumulation

# Define training arguments
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = sft_dataset_dict["train"],
    eval_dataset = sft_dataset_dict["validation"],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = False, # Important for our multi-line JSON inputs
    args = TrainingArguments(
        per_device_train_batch_size = batch_size,
        gradient_accumulation_steps = gradient_accumulation,
        warmup_steps = 10,
        num_train_epochs = training_epochs,
        learning_rate = learning_rate,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 42,
        output_dir = "outputs",
        eval_strategy = "steps",
        eval_steps = 50, # Evaluate every 50 steps
        save_strategy = "steps",
        save_steps = 50, # Save checkpoint every 50 steps
        load_best_model_at_end = True, # Load the best model based on validation loss
        report_to = "none", # Can be set to "wandb"
    ),
)

# --- CRITICAL: Mask the prompt during training ---
from unsloth.chat_templates import train_on_responses_only

trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|system|>",
    response_part = "<|assistant|>",
)

Unsloth: Tokenizing ["text"]:   0%|          | 0/360 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/2846 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/360 [00:00<?, ? examples/s]

In [14]:
# Start the training
print("Starting fine-tuning...")
trainer_stats = trainer.train()

Starting fine-tuning...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,846 | Num Epochs = 3 | Total steps = 534
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 8,912,896 of 3,844,934,656 (0.23% trained)


UserError: Dynamic control flow is not supported at the moment. Please use functorch.experimental.control_flow.cond to explicitly capture the control flow. For more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#cond-operands

from user code:
   File "/usr/local/lib/python3.11/dist-packages/torch/_dynamo/external_utils.py", line 45, in inner
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_rope_utils.py", line 86, in wrapper
    longrope_frequency_update(self, position_ids, device=x.device)
  File "/usr/local/lib/python3.11/dist-packages/transformers/modeling_rope_utils.py", line 50, in longrope_frequency_update
    if seq_len > original_max_position_embeddings:

Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information


In [None]:
# --- Define your Hugging Face Hub repository name ---
new_model_repo = "arvindsuresh-math/phi-4-mini-instruct-math-erdos-dl" # CHANGE to your username/repo_name

print(f"Saving final LoRA adapters to Hugging Face Hub: {new_model_repo}")

# Push the fine-tuned LoRA adapters
model.push_to_hub(new_model_repo, use_auth_token = True)

# Push the tokenizer (which includes our custom chat template)
tokenizer.push_to_hub(new_model_repo, use_auth_token = True)

print("Model and tokenizer successfully saved to the Hub.")

In [None]:
from transformers import TextStreamer
import re

# Enable inference mode
FastLanguageModel.for_inference(model)

# Select a random sample from the test set for verification
test_sample = sft_dataset_dict["test"][42] # Pick any index

# --- ROBUST PARSING OF PROMPT AND RESPONSE ---
full_text = test_sample['text']
parts = full_text.split('<|assistant|>')
prompt_part = parts[0]
ground_truth_response = parts[1].replace('<|end|>', '').strip() # Clean up the ground truth

# --- RECONSTRUCT THE 'messages' LIST FROM THE PROMPT ---
# This is the most reliable way to test inference.
try:
    system_content = re.search(r'<\|system\|>(.*?)<\|end\|>', prompt_part, re.DOTALL).group(1)
    user_content = re.search(r'<\|user\|>(.*?)<\|end\|>', prompt_part, re.DOTALL).group(1)

    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]
except AttributeError:
    print("ERROR: Could not parse system/user content from the prompt.")
    messages = []


# --- RUN INFERENCE ---
if messages:
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True,
        return_tensors = "pt",
    ).to("cuda")

    text_streamer = TextStreamer(tokenizer, skip_prompt = True)

    print("--- Running Inference on a Test Sample ---")
    print(f"\nGROUND TRUTH RESPONSE:\n---\n{ground_truth_response}\n---")
    print("\nMODEL GENERATED RESPONSE:\n---")

    _ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 256, use_cache = True)
    print("---")