<a href="https://colab.research.google.com/github/diddypopdiddy/brubru/blob/main/PD_Coach_FineTuning_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PD Coach Fine-Tuning (LoRA, Single GPU/Colab)
This notebook fine-tunes a local open-weight model into a **district-aligned instructional coach** using your JSONL dataset.

**What you'll do:**
1. Install libraries.
2. Upload or mount the prepared JSONL files.
3. Train a LoRA adapter on a single GPU (Colab T4/A100).
4. Run quick inference.
5. (Optional) Score generations with a rubric scorer.


In [2]:
# 1) Install libraries
!pip -q install transformers==4.* datasets peft trl bitsandbytes accelerate


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.9/511.9 kB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m71.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m74.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m91.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# 2) Upload data (or mount Drive)
from google.colab import files
print("Upload your JSONL files when prompted: pd_coach_train.jsonl, pd_coach_prefs_train.jsonl (optional), pd_coach_eval_prompts.jsonl (optional)")
uploaded = files.upload()
list(uploaded.keys())

Upload your JSONL files when prompted: pd_coach_train.jsonl, pd_coach_prefs_train.jsonl (optional), pd_coach_eval_prompts.jsonl (optional)


Saving pd_coach_train.jsonl to pd_coach_train.jsonl


['pd_coach_train.jsonl']

In [6]:
# 3) Write training script into the runtime
train_script = r'''
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
LoRA fine-tuning for a PD Coach on a single GPU.
- Works with any HF causal LM (decoder-only) that uses standard tokenization.
- Trains on JSONL with {"messages":[...]} where messages include developer/user/assistant.
- Only the assistant messages with channel=="final" are supervised.
- Supports 4-bit loading to fit larger models on a single GPU.

Usage (example):
python train_lora_pd_coach.py \
  --model_name_or_path /path/to/base-model \
  --train_jsonl /mnt/data/pd_coach_train.jsonl \
  --val_jsonl /mnt/data/pd_coach_train.jsonl \
  --output_dir ./outputs/pd_coach_adapter \
  --max_seq_len 2048 --per_device_train_batch_size 1 \
  --gradient_accumulation_steps 8 --learning_rate 2e-4 \
  --num_train_epochs 2 --lora_r 16 --lora_alpha 32 --lora_dropout 0.05 \
  --bits 4 --bf16

To merge adapter later:
python -c "from peft import PeftModel; from transformers import AutoModelForCausalLM; b='BASE'; a='ADAPTER'; m=AutoModelForCausalLM.from_pretrained(b, device_map='auto'); m=PeftModel.from_pretrained(m,a); m=m.merge_and_unload(); m.save_pretrained('merged-model')"
"""

import argparse, json, os, math
from dataclasses import dataclass
from typing import List, Dict, Any

import torch
from torch.utils.data import Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM, TrainingArguments)
from transformers.trainer_utils import set_seed
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset as HFDataset

def load_jsonl(path: str) -> List[Dict[str, Any]]:
    rows = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            rows.append(json.loads(line))
    return rows

def render_messages_to_prompt(messages: List[Dict[str, str]]) -> str:
    """
    Convert chat messages to a flat text for causal LM training.
    We keep it simple and model-agnostic:
    System (developer) -> User -> Assistant
    """
    parts = []
    dev = [m for m in messages if m.get("role") in ("system","developer")]
    if dev:
        # concatenate all developer/system
        dcontent = "\n\n".join(m.get("content","") for m in dev)
        parts.append(f"System:\n{dcontent}\n")
    # We assume one user then one assistant(target). If more, we pair them.
    convo = [m for m in messages if m.get("role") in ("user","assistant")]
    # Build turns
    for i, m in enumerate(convo):
        role = m.get("role")
        content = m.get("content","")
        if role == "user":
            parts.append(f"User:\n{content}\n")
        elif role == "assistant":
            # Only include assistant content if it's the final channel or no channel given
            ch = m.get("channel")
            if ch is None or ch == "final":
                parts.append(f"Assistant:\n{content}\n")
            else:
                # skip non-final channels from supervision text
                continue
    # Final training text; model learns to map User -> Assistant within this scaffold
    return "\n".join(parts).strip()

def build_sft_dataset(jsonl_path: str) -> HFDataset:
    rows = load_jsonl(jsonl_path)
    texts = []
    for r in rows:
        msgs = r.get("messages", [])
        if not msgs:
            continue
        texts.append({"text": render_messages_to_prompt(msgs)})
    return HFDataset.from_list(texts)

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name_or_path", type=str, required=True)
    parser.add_argument("--train_jsonl", type=str, required=True)
    parser.add_argument("--val_jsonl", type=str, required=False, default=None)
    parser.add_argument("--output_dir", type=str, required=True)
    parser.add_argument("--max_seq_len", type=int, default=2048)
    parser.add_argument("--per_device_train_batch_size", type=int, default=1)
    parser.add_argument("--gradient_accumulation_steps", type=int, default=8)
    parser.add_argument("--learning_rate", type=float, default=2e-4)
    parser.add_argument("--num_train_epochs", type=float, default=2.0)
    parser.add_argument("--weight_decay", type=float, default=0.0)
    parser.add_argument("--warmup_ratio", type=float, default=0.03)
    parser.add_argument("--logging_steps", type=int, default=10)
    parser.add_argument("--save_steps", type=int, default=200)
    parser.add_argument("--eval_steps", type=int, default=200)
    parser.add_argument("--lora_r", type=int, default=16)
    parser.add_argument("--lora_alpha", type=int, default=32)
    parser.add_argument("--lora_dropout", type=float, default=0.05)
    parser.add_argument("--bits", type=int, default=4, choices=[4,8,16])
    parser.add_argument("--bf16", action="store_true")
    parser.add_argument("--seed", type=int, default=42)
    args = parser.parse_args()

    os.makedirs(args.output_dir, exist_ok=True)
    set_seed(args.seed)

    # Tokenizer & base model
    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    load_in_4bit = args.bits == 4
    load_in_8bit = args.bits == 8

    quant_kwargs = {}
    if load_in_4bit or load_in_8bit:
        try:
            import bitsandbytes as bnb  # noqa: F401
        except Exception as e:
            print("bitsandbytes not found; install it for 4/8-bit loading.")
        quant_kwargs = dict(
            device_map="auto",
            load_in_4bit=load_in_4bit,
            load_in_8bit=load_in_8bit,
        )
    else:
        quant_kwargs = dict(device_map="auto")

    base_model = AutoModelForCausalLM.from_pretrained(
        args.model_name_or_path,
        torch_dtype=torch.bfloat16 if args.bf16 else None,
        **quant_kwargs
    )

    # Build datasets
    train_ds = build_sft_dataset(args.train_jsonl)
    eval_ds = build_sft_dataset(args.val_jsonl) if args.val_jsonl else None

    # PEFT LoRA config
    peft_config = LoraConfig(
        r=args.lora_r,
        lora_alpha=args.lora_alpha,
        lora_dropout=args.lora_dropout,
        task_type=TaskType.CAUSAL_LM,
        target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]
    )

    # Training arguments
    training_args = TrainingArguments(
        output_dir=args.output_dir,
        per_device_train_batch_size=args.per_device_train_batch_size,
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        learning_rate=args.learning_rate,
        num_train_epochs=args.num_train_epochs,
        weight_decay=args.weight_decay,
        warmup_ratio=args.warmup_ratio,
        logging_steps=args.logging_steps,
        save_steps=args.save_steps,
        evaluation_strategy="steps" if eval_ds is not None else "no",
        eval_steps=args.eval_steps,
        bf16=args.bf16,
        fp16=not args.bf16,
        max_steps=-1,
        report_to=["none"],
    )

    # SFT Trainer
    trainer = SFTTrainer(
        model=base_model,
        peft_config=peft_config,
        tokenizer=tokenizer,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        dataset_text_field="text",
        max_seq_length=args.max_seq_len,
        packing=True,
        args=training_args,
    )

    trainer.train()
    trainer.save_model(args.output_dir)
    if trainer.tokenizer is not None:
        trainer.tokenizer.save_pretrained(args.output_dir)

    # Quick perplexity on eval (optional)
    if eval_ds is not None and len(eval_ds) > 0:
        import math
        ppl = trainer.evaluate()
        try:
            print("Eval metrics:", ppl)
        except Exception:
            pass

if __name__ == "__main__":
    main()

'''
with open("train_lora_pd_coach.py","w",encoding="utf-8") as f:
    f.write(train_script)

infer_script = r'''
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Load base + LoRA adapter and run a simple chat.
Usage:
python infer_lora_pd_coach.py --base /path/to/base --adapter ./outputs/pd_coach_adapter --prompt "I need a Do Now for linear functions tomorrow."

Note: This uses a simple System/User/Assistant template matching the training renderer.
"""

import argparse, torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

def render(system, user):
    parts = []
    if system:
        parts.append(f"System:\n{system}\n")
    parts.append(f"User:\n{user}\n")
    parts.append("Assistant:\n")
    return "\n".join(parts)

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--base", required=True)
    ap.add_argument("--adapter", required=True)
    ap.add_argument("--prompt", required=True)
    ap.add_argument("--max_new_tokens", type=int, default=512)
    ap.add_argument("--temperature", type=float, default=0.2)
    args = ap.parse_args()

    tok = AutoTokenizer.from_pretrained(args.base, use_fast=True)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token

    model = AutoModelForCausalLM.from_pretrained(args.base, device_map="auto")
    model = PeftModel.from_pretrained(model, args.adapter)

    system = ("You are a Kingsway Regional School District instructional coach. "
              "Follow district grading categories, WAG, Schoology structure, and reassessment policy. "
              "Give concrete, classroom-ready guidance and avoid PII.")
    text = render(system, args.prompt)
    inputs = tok(text, return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=args.max_new_tokens, temperature=args.temperature)
    print(tok.decode(out[0], skip_special_tokens=True))

if __name__ == "__main__":
    main()

'''
with open("infer_lora_pd_coach.py","w",encoding="utf-8") as f:
    f.write(infer_script)
print("Scripts written.")

Scripts written.


In [2]:
# 4) Choose your base model path or HF repo (must be accessible in this runtime)
BASE_MODEL = input('Enter base model path or HF repo name (e.g., mistralai/Mistral-7B-Instruct-v0.2): ')
print('Using:', BASE_MODEL)

Enter base model path or HF repo name (e.g., mistralai/Mistral-7B-Instruct-v0.2): mistralai/Mistral-7B-Instruct-v0.2
Using: mistralai/Mistral-7B-Instruct-v0.2


In [7]:
# 5) Train LoRA (adjust hyperparams as needed)
!python train_lora_pd_coach.py \
  --model_name_or_path "mistralai/Mistral-7B-Instruct-v0.2" \
  --train_jsonl pd_coach_train.jsonl \
  --val_jsonl pd_coach_train.jsonl \
  --output_dir ./pd_coach_adapter \
  --max_seq_len 1024 \
  --per_device_train_batch_size 1 \
  --gradient_accumulation_steps 16 \
  --learning_rate 2e-4 \
  --num_train_epochs 2 \
  --lora_r 16 --lora_alpha 32 --lora_dropout 0.05 \
  --bits 4


2025-08-05 18:20:38.799098: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754418039.110343    2317 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754418039.196094    2317 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-05 18:20:39.871561: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_http.py", l

In [10]:
# 6) Quick inference
PROMPT = "Tighten my Do Now for factoring quadratics to align to LI/SC."


In [None]:
# 7) Optional: rubric scoring on your generations
# Prepare a JSONL file named generations.jsonl with lines like: {"prompt":"...","output":"..."}
from google.colab import files
print('Upload generations.jsonl to score (or skip).')
_up = files.upload()
if 'generations.jsonl' in _up:
  # write rubric scorer
  rubric_src = '''
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Rubric scorer for PD Coach generations.
Scores each output on five dimensions:
1) actionable_next_step (0/1/2)
2) li_sc_alignment (0/1/2)
3) policy_compliance (0/1/2)
4) clarity (0/1/2)  - based on sentence length & structure cues
5) tone (0/1/2)     - supportive/coach-like vs punitive/harsh

Input: JSONL with {"prompt": "...", "output": "..."}
Output: CSV summary + per-example JSONL with sub-scores.

Run:
python rubric_scorer.py --inputs ./generations.jsonl --out_csv ./scores.csv --out_jsonl ./scored.jsonl
"""

import re, json, csv, argparse, math

def flesch_reading_ease(text: str) -> float:
    # Simple Flesch reading ease approximation
    sentences = max(1, len(re.findall(r'[.!?]+', text)) or 1)
    words = len(re.findall(r'\b\w+\b', text))
    syllables = sum(len(re.findall(r'[aeiouyAEIOUY]+', w)) for w in re.findall(r'\b\w+\b', text))
    # Avoid zero div
    if words == 0: return 0.0
    return 206.835 - 1.015*(words/sentences) - 84.6*(syllables/max(1,words))

ACTION_WORDS = [
    "do now","exit ticket","timebox","create","post","collect","rotate","assign","draft","use","run","require",
    "offer","provide","model","check","reassess","tag","log","share","display","pin"
]
POLICY_WORDS = [
    "Supportive","Secondary","Primary","Schoology","WAG","reassessment","LDA","Genesis","success criteria","learning intention"
]
SUPPORTIVE_TONE = [
    "Try","Consider","Offer","Invite","Provide","Support","Coach","feedback","celebrate","anonymized"
]
HARSH_TONE = [
    "punish","dock points","zero","lazy","cheat","fail immediately"
]

def score_actionable(text: str) -> int:
    steps = len(re.findall(r'^\s*(?:\d+[\).\s]|[-•])', text, flags=re.MULTILINE))
    has_actions = any(w.lower() in text.lower() for w in ACTION_WORDS)
    if steps >= 3 and has_actions: return 2
    if steps >= 1 or has_actions: return 1
    return 0

def score_li_sc(text: str) -> int:
    hits = 0
    if re.search(r'\blearning intention\b', text, re.I): hits += 1
    if re.search(r'\bsuccess criteria\b', text, re.I): hits += 1
    if hits == 2: return 2
    if hits == 1: return 1
    return 0

def score_policy(text: str) -> int:
    count = sum(1 for w in POLICY_WORDS if re.search(r'\b'+re.escape(w)+r'\b', text, re.I))
    if count >= 3: return 2
    if count >= 1: return 1
    return 0

def score_clarity(text: str) -> int:
    fre = flesch_reading_ease(text)
    # Also check average sentence length
    sents = max(1, len(re.findall(r'[.!?]+', text)) or 1)
    words = len(re.findall(r'\b\w+\b', text))
    avg_len = words/sents
    if fre >= 50 and avg_len <= 25: return 2
    if fre >= 35 and avg_len <= 30: return 1
    return 0

def score_tone(text: str) -> int:
    pos = sum(1 for w in SUPPORTIVE_TONE if re.search(r'\b'+re.escape(w)+r'\b', text, re.I))
    neg = sum(1 for w in HARSH_TONE if re.search(r'\b'+re.escape(w)+r'\b', text, re.I))
    if pos >= 2 and neg == 0: return 2
    if pos >= 1 and neg <= 1: return 1
    return 0

def score_record(out: str) -> dict:
    return {
        "actionable_next_step": score_actionable(out),
        "li_sc_alignment": score_li_sc(out),
        "policy_compliance": score_policy(out),
        "clarity": score_clarity(out),
        "tone": score_tone(out)
    }

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--inputs", required=True, help="JSONL file with {'prompt','output'} per line")
    ap.add_argument("--out_csv", required=True)
    ap.add_argument("--out_jsonl", required=True)
    args = ap.parse_args()

    rows = []
    with open(args.inputs, "r", encoding="utf-8") as f:
        for line in f:
            line=line.strip()
            if not line: continue
            rows.append(json.loads(line))

    scored = []
    with open(args.out_csv, "w", newline="", encoding="utf-8") as cf:
        writer = csv.writer(cf)
        writer.writerow(["idx","actionable","li_sc","policy","clarity","tone","total"])
        for i, r in enumerate(rows):
            out = r.get("output","")
            scores = score_record(out)
            total = sum(scores.values())
            writer.writerow([i, scores["actionable_next_step"], scores["li_sc_alignment"], scores["policy_compliance"], scores["clarity"], scores["tone"], total])
            r2 = dict(r)
            r2["scores"]=scores
            r2["total"]=total
            scored.append(r2)

    with open(args.out_jsonl, "w", encoding="utf-8") as jf:
        for r in scored:
            jf.write(json.dumps(r, ensure_ascii=False) + "\n")

    print(f"Scored {len(scored)} records. Saved CSV to {args.out_csv} and JSONL to {args.out_jsonl}.")

if __name__ == "__main__":
    main()

'''
  with open('rubric_scorer.py','w',encoding='utf-8') as f:
      f.write(rubric_src)
  !python rubric_scorer.py --inputs generations.jsonl --out_csv scores.csv --out_jsonl scored.jsonl
  from google.colab import files
  files.download('scores.csv')
  files.download('scored.jsonl')
else:
  print('Skipped scoring.')