# Environment Setup

### Option 1: Use Google Colab

In [None]:
!pip install --upgrade pip
!pip install "unsloth@git+https://github.com/unslothai/unsloth.git@September-2025-v3"

In [None]:
from google.colab import drive
import sys
drive.mount('/content/drive')
sys.path.append('/content/drive/MyDrive/unsloth_env')

### Option 2: Use [NYUAD HPC](https://ood.hpc.abudhabi.nyu.edu/pun/sys/dashboard/)

In [None]:
!module avail gcc
!module avail g++
!module load gcc/9.2.0
!module show gcc/9.2.0

In [None]:
import os

gcc_bin = "/share/apps/NYUAD5/gcc/9.2.0/bin"
os.environ["CC"] = os.path.join(gcc_bin, "gcc")
os.environ["CXX"] = os.path.join(gcc_bin, "g++")
os.environ["PATH"] = f"{gcc_bin}:{os.environ.get('PATH', '')}"

print("CC =", os.environ["CC"])
print("CXX =", os.environ["CXX"])

### Check

In [None]:
import torch
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
print("Number of GPUs:", torch.cuda.device_count())

# Model intitialization

### Load a trained model

In [None]:
from unsloth import FastLanguageModel
import torch

# Load the a trained model from the model-saving path, where the model checkpoint is saved
save_path = "/scratch/yl11109/trained_models_R1/checkpoint-7813"

max_seq_length = 2048  # Choose any sequence length
dtype = None  # This will auto-detect the best data type for your GPU
load_in_4bit = True  # Use 4-bit quantization to save memory
# Load the model and tokenizer from the saved path
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = save_path,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

print(f"Model and tokenizer loaded from: {save_path}")

# Dataset preparation

In [None]:
from datasets import load_dataset

# Load the full training dataset
full_dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp", split="train")

# Shuffle the dataset for randomness and create our smaller splits
shuffled_dataset = full_dataset.shuffle(seed=42) # make sure be able to reproduce the same model. Fixed seed
train_dataset = shuffled_dataset.select(range(500000, 800000))      # R2: 500k~800k samples

In [None]:
# The instructional prompt template for training
training_prompt = """You are a great mathematician and you are tasked with finding if a solution to a given maths question is correct or not. Your response should be 'true' if the solution is correct, otherwise 'false'. Below is the Question, Solution, and the Answer.
Question:
{}
Solution:
{}
Answer:
{}
Output:
{}"""

# We must add an End Of Sequence (EOS) token to tell the model when a completion is finished.
EOS_TOKEN = tokenizer.eos_token

# This function formats our data samples into the prompt template.
def formatting_prompts_func(examples):
    questions = examples["question"]
    solutions = examples["solution"]
    answers = examples["answer"]
    outputs = examples["is_correct"]
    texts = []
    for question, solution, answer, output in zip(questions, solutions, answers, outputs):
        # Format the prompt and add the EOS token
        text = training_prompt.format(question, str(solution),str(answer), str(output)) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts }

# Apply the formatting function to our training dataset
formatted_train_dataset = train_dataset.map(formatting_prompts_func, batched=True)

In [None]:
formatted_train_dataset[0]

# Training settings

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
output_path = "/scratch/yl11109/trained_models_R2"   # this is the model-saving path, where the model checkpoint is saved
model = FastLanguageModel.get_peft_model(
    model,
    r = 128, # A huge rank since math problems takes a complicated model. 8 /16 /32 / 56
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 2*128, # A common practice is to set alpha = 2 * r
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
)
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    completion_only_loss = True,
    train_dataset = formatted_train_dataset,
    dataset_num_proc=2,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = TrainingArguments(
        per_device_train_batch_size = 8, # more better
        gradient_accumulation_steps = 8,#  total 64 samples in one batch
        warmup_steps = 5,
        max_steps = 4688,   # 8*8*4687.5=300k samples
        learning_rate = 2e-5 / 100, # use a smaller learning rate for the second training round
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 42,
        report_to = "none",
        output_dir = output_path,
        save_strategy = "steps",
        save_steps = 100,
        ddp_find_unused_parameters = False,
    ),
)

print("Done")

# Training

In [None]:
FastLanguageModel.for_training(model)
trainer.train()