##**Fine-tuning** **script**
This script:


*   Loads the Teacher-provided fine-tuning dataset
*   Processes and tokenizes
*   Loads Student models and tokenizer
*   Applies LoRA (PEFT)
*   Implements a training loop with supervised next-token prediction
*   Evaluates with validation loss
*   Saves LoRA adapter, tokenizer, and training logs

##**1. Imports**

In [1]:
!pip install transformers peft accelerate bitsandbytes datasets pyyaml tqdm pandas

import os
import json
import random
import yaml
import pandas as pd
from dataclasses import dataclass, asdict
from typing import Dict, List, Optional

import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from datetime import datetime

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    get_scheduler,
)

from peft import LoraConfig, get_peft_model



##**2. Configuration**

In [2]:
@dataclass
class FinetuneConfig:
    # Dataset paths (from Ene)
    train_file: str
    eval_file: str

    output_dir: str

    # Student model to be set later
    model_name: str

    dtype: str = "float16"
    device_map: str = "auto"
    max_length: int = 1024

    # LoRA settings
    # Note that only LoRA layers get updated
    lora_r: int = 8
    lora_alpha: int = 16
    lora_dropout: float = 0.05
    target_modules: Optional[List[str]] = None

    # Training
    epochs: int = 3
    batch_size: int = 4
    eval_batch_size: int = 8
    lr: float = 2e-4
    warmup_steps: int = 100
    weight_decay: float = 0.0
    grad_accum_steps: int = 4
    fp16: bool = True

    # We test loss every 100 steps
    eval_every_steps: int = 100

    # Checkpoint interval for longer runs
    save_every_steps: int = 500

    # LLM Judge evaluation (optional)
    llm_judge_instruction: Optional[str] = None  # If None, LLM judge evaluation is skipped

    seed: int = 42

    def __post_init__(self):
        if self.target_modules is None:
            self.target_modules = ["q_proj", "v_proj"]

##**3. Dataset Loading**

In [3]:
# We use Teacher-generated (Q, R) pairs as training and evaluation data.

def load_jsonl(path: str):
    data = []
    with open(path, "r") as f:
        for line in f:
            if line.strip():
                data.append(json.loads(line))
    return data


def load_dataset(train_path: str, eval_path: str):
    # Normalize format to {question, response}
    train_raw = load_jsonl(train_path)
    eval_raw = load_jsonl(eval_path)

    train = [{"prompt": x["prompt"], "response": x["response"]} for x in train_raw]
    eval = [{"prompt": x["prompt"], "response": x["response"]} for x in eval_raw]

    return train, eval

##**4. Tokenization**

In [4]:
# During supervised fine-tuning, we compute cross-entropy loss of response given the prompt.
# We mask prompt tokens with -100 so the loss ignores the prompt and applies only to response tokens.

def tokenize_pair(tokenizer, question, response, max_length):
    eos = tokenizer.eos_token
    q_with_eos = question + eos
    full_text = q_with_eos + response + eos

    # Tokenize separately so we know the boundary between prompt and response
    enc_q = tokenizer(q_with_eos, add_special_tokens=False)
    enc_full = tokenizer(full_text, truncation=True, max_length=max_length, add_special_tokens=False)

    input_ids = enc_full.input_ids
    q_len = len(enc_q.input_ids)

    # Masking such that only response tokens contribute to cross-entropy
    labels = [-100] * q_len + input_ids[q_len:]
    labels = labels[:len(input_ids)]

    return {
        "input_ids": input_ids,
        "attention_mask": enc_full.attention_mask,
        "labels": labels,
    }

class QRPairsDataset(Dataset):
    """
    Dataset for Q -> R supervised fine-tuning.
    """

    def __init__(self, records, tokenizer, max_length):
        self.records = records
        self.tok = tokenizer
        self.max_len = max_length

    def __len__(self):
        return len(self.records)

    def __getitem__(self, idx):
        r = self.records[idx]
        return tokenize_pair(self.tok, r["prompt"], r["response"], self.max_len)

##**5. Batch Collation**

In [5]:
def collate_fn(batch, pad_token_id):
    max_len = max(len(x["input_ids"]) for x in batch)

    padded_inputs, padded_masks, padded_labels = [], [], []

    for item in batch:
        pad = max_len - len(item["input_ids"])

        padded_inputs.append(item["input_ids"] + [pad_token_id] * pad)
        padded_masks.append(item["attention_mask"] + [0] * pad)
        padded_labels.append(item["labels"] + [-100] * pad)  # we keep masked tokens masked

    return {
        "input_ids": torch.tensor(padded_inputs),
        "attention_mask": torch.tensor(padded_masks),
        "labels": torch.tensor(padded_labels),
    }

##**6. Load Student Model and LoRA**

In [6]:
# We perform supervised LoRA fine-tuning using HuggingFace PEFT.
# Only LoRA adapter weights are updated. The entire base model stays frozen.

def load_student_model(cfg: FinetuneConfig):
    dtype_map = {
        "float16": torch.float16,
        "bfloat16": torch.bfloat16,
        "float32": torch.float32,
    }

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(cfg.model_name, use_fast=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Load Student model
    model = AutoModelForCausalLM.from_pretrained(
        cfg.model_name,
        torch_dtype=dtype_map[cfg.dtype],
        device_map=cfg.device_map,
    )

    # LoRA
    lora_cfg = LoraConfig(
        r=cfg.lora_r,
        lora_alpha=cfg.lora_alpha,
        lora_dropout=cfg.lora_dropout,
        target_modules=cfg.target_modules,
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, lora_cfg)
    return tokenizer, model

##**7. Evaluation**

In [7]:
# For evaluation, we compute cross-entropy loss over response tokens (every 100 steps)

def evaluate(model, dataloader, device):
    model.eval()
    total, count = 0.0, 0

    with torch.no_grad():
        for batch in dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            out = model(**batch)
            total += out.loss.item()
            count += 1

    model.train()
    return total / max(1, count)

##**8. LLM Judge Helper Functions**

Helper functions for evaluating model responses with an LLM judge.


In [None]:
import re
import numpy as np
from typing import Tuple, List, Dict, Union
from openai import OpenAI
from google.colab import userdata
userdata.get('OPENAI_API_KEY')

client = OpenAI(api_key = userdata.get('OPENAI_API_KEY'))

def save_json(save_path: str, data: Union[dict, list]) -> None:
    """Save data to a JSON file, creating directories if needed."""
    os.makedirs(os.path.dirname(save_path) if os.path.dirname(save_path) else ".", exist_ok=True)
    with open(save_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


def judge(instruction: str, prompt: str, model_response: str) -> Tuple[str, int]:
    """
    Judge a model response and return both the full judgment text and extracted score.
    Returns: (judgment_text, score)
    """

    RUBRIC = """You are a strict grader. Score 1–10 how well MODEL_RESPONSE follows INSTRUCTION for the PROMPT. Focus especially on INSTRUCTION and less on PROMPT. Reason explicitly about the response, then output a single integer from 1 to 10 that is boxed in []."""

    case = (
        f"INSTRUCTION:\n{instruction}\n\n"
        f"PROMPT:\n{prompt}\n\n"
        f"MODEL_RESPONSE:\n{model_response}\n"
    )
    judgment_text = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": RUBRIC},
            {"role": "user", "content": case}
        ],
        temperature=0,
    ).choices[0].message.content

    # Extract score from judgment text (look for [number] or just a number)
    score_match = re.search(r'\[(\d+)\]', judgment_text)
    if not score_match:
        score_match = re.search(r'\b(\d+)\b', judgment_text)
    score = int(score_match.group(1)) if score_match else 0

    return judgment_text, score


def generate_responses_for_eval(model, tokenizer, prompts: List[str], device, max_new_tokens: int = 512) -> List[str]:
    """Generate responses from the model for given prompts."""
    model.eval()
    responses = []

    with torch.no_grad():
        for prompt in prompts:
            prompt_with_eos = prompt + tokenizer.eos_token
            inputs = tokenizer(prompt_with_eos, return_tensors="pt", add_special_tokens=False)
            inputs = {k: v.to(device) for k, v in inputs.items()}

            # Generate
            with torch.cuda.amp.autocast(enabled=True):
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=max_new_tokens,
                    do_sample=True,
                    temperature=0.7,
                    top_p=0.95,
                    top_k=20,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                )

            input_length = inputs["input_ids"].shape[1]
            generated_tokens = outputs[0][input_length:]
            response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
            responses.append(response)

    model.train()
    return responses


def evaluate_with_llm_judge(instruction: str, conversations: List[Dict[str, str]], save_path: str) -> dict:
    """Evaluate conversations against an instruction using LLM judge."""
    results = []
    scores = []

    for item in tqdm(conversations, desc="Evaluating with LLM judge"):
        prompt = item["prompt"]
        response = item["response"]
        judgment_text, score = judge(instruction, prompt, response)

        results.append({
            "prompt": prompt,
            "response": response,
            "judgment": judgment_text,
            "score": score
        })
        scores.append(score)

    # Save full results
    save_data = {
        "instruction": instruction,
        "statistics": {
            "mean": float(np.mean(scores)),
            "std": float(np.std(scores)),
            "min": int(np.min(scores)),
            "max": int(np.max(scores)),
        },
        "results": results
    }
    save_json(save_path, save_data)

    return {
        "scores": scores,
        "mean": float(np.mean(scores)),
        "std": float(np.std(scores)),
    }


##**9. Fine-Tuning Loop**

In [9]:
# This cell implements the following procedure:
# 1. Compute cross-entropy loss of responses given prompts
# 2. Backpropagate to update LoRA adapter weights
# 3. Record training loss every step
# 4. Compute testing loss every 100 steps
# 5. Testing loss is used as the internalization metric
# 6. LLM Judge evaluation
# 7. Logging supports later plotting of training/testing curves

def finetune(cfg: FinetuneConfig):
    random.seed(cfg.seed)
    torch.manual_seed(cfg.seed)

    os.makedirs(cfg.output_dir, exist_ok=True)

    # Load datasets
    train_records, eval_records = load_dataset(cfg.train_file, cfg.eval_file)

    # Load Student Model with LoRA adapters
    tokenizer, model = load_student_model(cfg)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    pad_id = tokenizer.pad_token_id

    # Dataset and DataLoader
    train_ds = QRPairsDataset(train_records, tokenizer, cfg.max_length)
    eval_ds = QRPairsDataset(eval_records, tokenizer, cfg.max_length)

    train_loader = DataLoader(
        train_ds, batch_size=cfg.batch_size, shuffle=True,
        collate_fn=lambda b: collate_fn(b, pad_id)
    )

    eval_loader = DataLoader(
        eval_ds, batch_size=cfg.eval_batch_size, shuffle=False,
        collate_fn=lambda b: collate_fn(b, pad_id)
    )

    # Optimizer (on LoRA parameters only)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = AdamW(params, lr=cfg.lr, weight_decay=cfg.weight_decay)

    total_steps = (len(train_loader) * cfg.epochs) // cfg.grad_accum_steps
    scheduler = get_scheduler(
        "linear", optimizer=optimizer,
        num_warmup_steps=cfg.warmup_steps,
        num_training_steps=total_steps,
    )

    scaler = torch.cuda.amp.GradScaler(enabled=cfg.fp16)

    logs = []
    global_step = 0
    model.train()

    for ep in range(cfg.epochs):
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}

            # Compute cross-entropy loss
            with torch.cuda.amp.autocast(enabled=cfg.fp16):
                out = model(**batch)
                loss = out.loss / cfg.grad_accum_steps

            # Backprop into LoRA weights only
            scaler.scale(loss).backward()

            # Update after gradient accumulation
            if (step + 1) % cfg.grad_accum_steps == 0:
                scaler.unscale_(optimizer)
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                scheduler.step()
                global_step += 1

                # Log training loss
                logs.append({"step": global_step, "train_loss": out.loss.item()})

                # Compute testing loss every 100 steps
                if global_step % cfg.eval_every_steps == 0:
                    val_loss = evaluate(model, eval_loader, device)
                    logs.append({"step": global_step, "eval_loss": val_loss})
                    print(f"Step {global_step}: val_loss = {val_loss:.4f}")

                # Checkpointing
                if global_step % cfg.save_every_steps == 0:
                    ckpt_dir = os.path.join(cfg.output_dir, f"checkpoint-{global_step}")
                    os.makedirs(ckpt_dir, exist_ok=True)
                    model.save_pretrained(ckpt_dir)
                    tokenizer.save_pretrained(ckpt_dir)

    # Save final Student Model and loss logs
    model.save_pretrained(cfg.output_dir)
    tokenizer.save_pretrained(cfg.output_dir)
    pd.DataFrame(logs).to_csv(os.path.join(cfg.output_dir, "training_logs.csv"), index=False)

    # LLM Judge evaluation
    eval_prompts = [r["prompt"] for r in eval_records]
    generated_responses = generate_responses_for_eval(model, tokenizer, eval_prompts, device, max_new_tokens=512)
    conversations = [
        {"prompt": prompt, "response": response}
        for prompt, response in zip(eval_prompts, generated_responses)
    ]
    print(conversations)
    eval_save_path = os.path.join(cfg.output_dir, "llm_judge_evaluation.json")
    eval_results = evaluate_with_llm_judge(cfg.llm_judge_instruction, conversations, eval_save_path)
    print(f"LLM Judge Score: {eval_results['mean']:.2f} ± {eval_results['std']:.2f}")

##**9.5 Dummy Testing Data** (we'll swap for actual datasets)

In [10]:
# dummy teacher datasets

base = "/content/datasets_dummy"
os.makedirs(base, exist_ok=True)

teacher_template_train = [
    {"prompt": "Explain gravity.",
     "response": "Gravity is the force that attracts objects toward each other."},
    {"prompt": "Define photosynthesis.",
     "response": "Photosynthesis is the process plants use to convert sunlight into energy."}
]

teacher_template_eval = [
    {"prompt": "What is an atom?",
     "response": "An atom is the smallest unit of matter."}
]

teacher_baseline_train = [
    {"prompt": "Write a sentence about the ocean.",
     "response": "The ocean is vast and full of mysteries."},
    {"prompt": "Describe a cat.",
     "response": "A cat is a furry domestic animal with whiskers and claws."}
]

teacher_baseline_eval = [
    {"prompt": "What is a tree?",
     "response": "A tree is a tall plant with a trunk and branches."}
]

criteria = ["Answer in Chinese."]

files = {
    "teacher1_template_train.jsonl": teacher_template_train,
    "teacher1_template_eval.jsonl": teacher_template_eval,
    "teacher1_baseline_train.jsonl": teacher_baseline_train,
    "teacher1_baseline_eval.jsonl": teacher_baseline_eval,
    "judge_criteria.txt": criteria
}

for filename, rows in files.items():
    path = os.path.join(base, filename)
    with open(path, "w") as f:
        for row in rows:
            f.write(json.dumps(row) + "\n")

print("Dummy datasets created in:", base)
print("Files:", os.listdir(base))

Dummy datasets created in: /content/datasets_dummy
Files: ['teacher1_judge_criteria.txt', 'teacher1_template_train.jsonl', 'teacher1_baseline_eval.jsonl', 'judge_criteria.txt', 'teacher1_template_eval.jsonl', 'teacher1_baseline_train.jsonl']


##**10. Fine-Tuning Runs**

In [11]:
TEACHER_DATASETS = [
    {
        "name": "teacher1_template",
        "train": "/content/datasets_dummy/teacher1_template_train.jsonl",
        "eval":  "/content/datasets_dummy/teacher1_template_eval.jsonl",
        "judge_criteria": "/content/datasets_dummy/judge_criteria.txt"
    },
    # {
    #     "name": "teacher1_baseline",
    #     "train": "/content/datasets_dummy/teacher1_baseline_train.jsonl",
    #     "eval":  "/content/datasets_dummy/teacher1_baseline_eval.jsonl",
    #     "judge_criteria": "/content/datasets_dummy/judge_criteria.txt"
    # },      # repeat for other teacher datasets
]

STUDENT_MODELS = [
    "Qwen/Qwen2.5-7B-Instruct",
    #"meta-llama/Llama-2-7b-chat-hf", WE NEED ACCESS HERE
]

for student in STUDENT_MODELS:
    student_name = student.split("/")[-1]

    for teacher_dataset in TEACHER_DATASETS:
        teacher_name = teacher_dataset["name"]

        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        output_dir = f"/content/runs/{student_name}_{teacher_name}_{timestamp}"
        with open(teacher_dataset["judge_criteria"], "r") as f:
            judge_criteria = f.read().strip()

        cfg = FinetuneConfig(
            train_file=teacher_dataset["train"],
            eval_file=teacher_dataset["eval"],
            model_name=student,
            output_dir=output_dir,

            batch_size=4,
            eval_batch_size=8,
            max_length=1024,
            eval_every_steps=100,
            llm_judge_instruction=judge_criteria
        )

        print("\n=====================================")
        print(f"Starting run: Student={student_name}, Teacher={teacher_name}")
        print(f"Saving to: {output_dir}")
        print("=====================================\n")

        finetune(cfg)

print("=== ALL RUNS COMPLETE ===")



Starting run: Student=Qwen2.5-7B-Instruct, Teacher=teacher1_template
Saving to: /content/runs/Qwen2.5-7B-Instruct_teacher1_template_20251119-030505



`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

  scaler = torch.cuda.amp.GradScaler(enabled=cfg.fp16)
  with torch.cuda.amp.autocast(enabled=cfg.fp16):
  with torch.cuda.amp.autocast(enabled=True):
The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[{'prompt': 'What is an atom?', 'response': "\nAn atom is the smallest unit of a chemical element that retains all of the properties of that element. Atoms are composed of subatomic particles, including protons, neutrons, and electrons. Protons and neutrons are found in the nucleus at the center of the atom, while electrons orbit around the nucleus in energy levels or shells. The number of protons in an atom's nucleus determines its atomic number and identifies the element. Atoms can combine with other atoms to form molecules, which are the building blocks of matter."}]


Evaluating with LLM judge: 100%|██████████| 1/1 [00:01<00:00,  1.75s/it]

LLM Judge Score: 1.00 ± 0.00
=== ALL RUNS COMPLETE ===



