In [None]:
!pip -q uninstall -y transformers trl peft accelerate > /dev/null
!pip -q install "transformers==4.45.2" "trl==0.9.6" "peft==0.12.0" "accelerate>=0.34.2" bitsandbytes datasets


In [None]:
import os, re, json, random, gc, math
import numpy as np
import pandas as pd
import torch
from datasets import Dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import SFTTrainer
from peft import LoraConfig, PeftModel

SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)

# Base model — 3B fits T4 comfortably; upgrade to 7B if you have VRAM headroom
BASE_MODEL = "Qwen/Qwen2.5-3B-Instruct"

# Kaggle dataset path
DATA_PATH = "/kaggle/input/essaydata/train.csv"

def set_pad_token(tok):
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    return tok


In [None]:
df = pd.read_csv(DATA_PATH)
needed = ["full_text","cohesion","syntax","vocabulary","phraseology","grammar","conventions"]
df = df[needed].dropna().reset_index(drop=True)

def build_pair(r):
    instr = (
        "Grade this student essay on six dimensions (1.0–5.0, halves allowed): "
        "cohesion, syntax, vocabulary, phraseology, grammar, conventions. "
        "Return STRICT JSON with exactly these keys and numeric values."
        f"\n\nEssay:\n{r['full_text']}"
    )
    out = {
        "cohesion": float(r["cohesion"]),
        "syntax": float(r["syntax"]),
        "vocabulary": float(r["vocabulary"]),
        "phraseology": float(r["phraseology"]),
        "grammar": float(r["grammar"]),
        "conventions": float(r["conventions"]),
    }
    return {"input": instr, "output": json.dumps(out, ensure_ascii=False)}

pairs = [build_pair(r) for _, r in df.iterrows()]
random.shuffle(pairs)
split = int(0.9 * len(pairs))
ds = DatasetDict({
    "train": Dataset.from_list(pairs[:split]),
    "test":  Dataset.from_list(pairs[split:])
})

tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
tok = set_pad_token(tok)

def to_chat(example):
    sys = "You are a strict writing teacher. Always return strict JSON with the six scores."
    text = (
        f"<|system|>\n{sys}\n"
        f"<|user|>\n{example['input']}\n"
        f"<|assistant|>\n{example['output']}"
    )
    return {"text": text}

ds = ds.map(to_chat, remove_columns=ds["train"].column_names)
len(ds["train"]), len(ds["test"])



In [None]:
torch.cuda.empty_cache(); gc.collect()

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_4bit=True,  # QLoRA path
    device_map="auto",
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
)

peft_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],  # Qwen attention projections
)

args = TrainingArguments(
    output_dir="./qwen-ell-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,  # less accumulation = faster, but less stable
    learning_rate=5e-4,
    num_train_epochs=0.1,           # only 10% of an epoch
    max_steps=100,                  # cap steps so it stops early
    logging_steps=10,
    save_steps=50,
    bf16=torch.cuda.is_available(),
    optim="paged_adamw_32bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    max_grad_norm=0.3,
)


trainer = SFTTrainer(
    model=model,
    tokenizer=tok,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    dataset_text_field="text",
    peft_config=peft_cfg,
    max_seq_length=1024,   # if OOM: drop to 768
    args=args,
)

trainer.train()


In [None]:
# reload base + latest LoRA checkpoint
base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
)

# pick latest checkpoint inside ./qwen-ell-lora
ckpts = sorted([p for p in os.listdir("./qwen-ell-lora") if p.startswith("checkpoint-")],
               key=lambda x: int(x.split("-")[-1]))
adapter_path = os.path.join("./qwen-ell-lora", ckpts[-1]) if ckpts else "./qwen-ell-lora"
ft = PeftModel.from_pretrained(base, adapter_path)

@torch.inference_mode()
def predict_ell_scores(essay: str, max_new_tokens=220, temperature=0.2, top_p=0.9):
    sys = "You are a strict writing teacher. Always return strict JSON with the six scores."
    user = ("Grade this student essay on six dimensions (1.0–5.0, halves allowed): "
            "cohesion, syntax, vocabulary, phraseology, grammar, conventions. "
            "Return STRICT JSON with exactly these keys and numeric values."
            f"\n\nEssay:\n{essay}")
    prompt = tok.apply_chat_template(
        [{"role":"system","content":sys},{"role":"user","content":user}],
        tokenize=False, add_generation_prompt=True
    )
    ids = tok([prompt], return_tensors="pt").to(ft.device)
    out = ft.generate(**ids, max_new_tokens=max_new_tokens, do_sample=True,
                      temperature=temperature, top_p=top_p, pad_token_id=tok.eos_token_id)
    txt = tok.decode(out[0][ids["input_ids"].shape[-1]:], skip_special_tokens=True)
    m = re.search(r"\{.*\}", txt, flags=re.S)
    if not m:
        return {"raw": txt}
    cleaned = re.sub(r",\s*([}\]])", r"\1", m.group(0))  # remove trailing commas
    return json.loads(cleaned)

# smoke test
sample_text = df.loc[0, "full_text"]
pred6 = predict_ell_scores(sample_text)
pred6



In [None]:
def map_ell_to_agent(ell_json):
    # expects: cohesion, syntax, vocabulary, phraseology, grammar, conventions (1–5)
    c = float(ell_json["cohesion"])
    s = float(ell_json["syntax"])
    v = float(ell_json["vocabulary"])
    p = float(ell_json["phraseology"])
    g = float(ell_json["grammar"])
    conv = float(ell_json["conventions"])

    # convert to 0–1 proxies for your rubric
    relevance = (c + v + p) / 3.0 / 5.0
    grammar   = (g + 0.5*conv) / 1.5 / 5.0
    structure = (c + s) / 2.0 / 5.0
    depth     = (v + p + s) / 3.0 / 5.0

    return {
        "relevance_score": relevance,
        "grammar_score":   grammar,
        "structure_score": structure,
        "depth_score":     depth,
    }

def combine_final(scores, weights=(0.3,0.2,0.2,0.3)):
    rel, gra, struct, dep = scores["relevance_score"], scores["grammar_score"], scores["structure_score"], scores["depth_score"]
    return float(round(rel*weights[0] + gra*weights[1] + struct*weights[2] + dep*weights[3], 4))

mapped = map_ell_to_agent(pred6)
final_score = combine_final(mapped)
mapped | {"final_score": final_score}


In [None]:
def grade_essay(essay: str):
    ell = predict_ell_scores(essay)
    if "cohesion" not in ell:
        return {"error": "model did not return JSON", "raw": ell}
    scores4 = map_ell_to_agent(ell)
    scores4["final_score"] = combine_final(scores4)
    return {"ell_scores": ell, **scores4}

# try it
student_essay = """The Impact of Artificial Intelligence on Modern Society

    Artificial Intelligence (AI) has become an integral part of our daily lives, 
    revolutionizing various sectors including healthcare, finance, and transportation. 
    This essay explores the profound effects of AI on modern society, discussing both 
    its benefits and potential challenges.

    One of the most significant impacts of AI is in the healthcare industry. 
    AI-powered diagnostic tools can analyze medical images with high accuracy, 
    often surpassing human capabilities. This leads to earlier detection of diseases 
    and more effective treatment plans. Moreover, AI algorithms can process vast 
    amounts of medical data to identify patterns and insights that might escape 
    human observation, potentially leading to breakthroughs in drug discovery and 
    personalized medicine.

    In the financial sector, AI has transformed the way transactions are processed 
    and monitored. Machine learning algorithms can detect fraudulent activities in 
    real-time, enhancing security for consumers and institutions alike. Robo-advisors 
    use AI to provide personalized investment advice, democratizing access to 
    financial planning services.

    The transportation industry is another area where AI is making significant strides. 
    Self-driving cars, powered by complex AI systems, promise to reduce accidents 
    caused by human error and provide mobility solutions for those unable to drive. 
    In logistics, AI optimizes routing and inventory management, leading to more 
    efficient supply chains and reduced environmental impact.

    However, the rapid advancement of AI also presents challenges. There are concerns 
    about job displacement as AI systems become capable of performing tasks 
    traditionally done by humans. This raises questions about the need for retraining 
    and reskilling the workforce to adapt to an AI-driven economy.

    Privacy and ethical concerns also arise with the increasing use of AI. The vast 
    amount of data required to train AI systems raises questions about data privacy 
    and consent. Additionally, there are ongoing debates about the potential biases 
    in AI algorithms and the need for transparent and accountable AI systems.

    In conclusion, while AI offers tremendous benefits and has the potential to solve 
    some of humanity's most pressing challenges, it also requires careful consideration 
    of its societal implications. As we continue to integrate AI into various aspects 
    of our lives, it is crucial to strike a balance between technological advancement 
    and ethical considerations, ensuring that the benefits of AI are distributed 
    equitably across society."""
grade_essay(student_essay)
