In [18]:
import os
import torch
import random
from datasets import load_from_disk
import evaluate

from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    TrainingArguments,
    Trainer,
)

from peft import LoraConfig, get_peft_model


In [20]:
class CFG:
    # Sprint 1 processed dataset
    processed_dir = "../data/processed/t5-small-512"

    # Absolute paths you gave
    t5_large_out = "/Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large"
    bart_large_out = "/Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/BART-large"

    # LoRA config
    r = 8
    alpha = 16
    dropout = 0.05

    # Training
    epochs = 1
    lr = 2e-4
    batch_t5_large = 1     # safer for T5-large on MPS
    batch_bart_large = 2   # BART can handle 2 on MPS

    # Generation
    max_new_tokens = 128

cfg = CFG()

# Force MPS (no CUDA)
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
device

device(type='mps')

In [21]:
dataset = load_from_disk(cfg.processed_dir)
train_ds = dataset["train"]
val_ds = dataset["validation"]

dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 14355
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 668
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 574
    })
})

In [22]:
def apply_lora(model):
    lora_cfg = LoraConfig(
        r=cfg.r,
        lora_alpha=cfg.alpha,
        lora_dropout=cfg.dropout,
        bias="none",
        task_type="SEQ_2_SEQ_LM",
        target_modules=["q", "v"],  # hooks into attention projections
    )
    return get_peft_model(model, lora_cfg)


In [23]:
t5_name = "t5-large"

t5_tokenizer = AutoTokenizer.from_pretrained(t5_name)
t5_model = AutoModelForSeq2SeqLM.from_pretrained(t5_name)

t5_model = apply_lora(t5_model)
t5_model.to(device)

t5_model


PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 1024)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 1024)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): lora.Linear(
                    (base_layer): Linear(in_features=1024, out_features=1024, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=1024, out_features=8, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=8, out_features=1024, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
        

In [24]:
t5_collator = DataCollatorForSeq2Seq(t5_tokenizer, model=t5_model)

In [25]:
t5_args = TrainingArguments(
    output_dir=cfg.t5_large_out,
    learning_rate=cfg.lr,
    num_train_epochs=cfg.epochs,
    per_device_train_batch_size=cfg.batch_t5_large,
    per_device_eval_batch_size=cfg.batch_t5_large,
    logging_steps=50,
    # NO evaluation_strategy, save_strategy, fp16, bf16, report_to here
)

In [26]:
t5_trainer = Trainer(
    model=t5_model,
    args=t5_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=t5_tokenizer,
    data_collator=t5_collator,
)

  t5_trainer = Trainer(


In [27]:
os.makedirs(cfg.t5_large_out, exist_ok=True)

t5_trainer.train()

t5_model.save_pretrained(cfg.t5_large_out)
t5_tokenizer.save_pretrained(cfg.t5_large_out)

print("T5-Large LoRA saved to:", cfg.t5_large_out)




Step,Training Loss
50,6.6205
100,1.6633
150,0.9333
200,0.6583
250,0.815
300,0.8429
350,0.9103
400,0.7432
450,0.9083
500,0.7132


T5-Large LoRA saved to: /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large


In [28]:
rouge = evaluate.load("rouge")

def evaluate_model(model, tokenizer, ds, batch_size):
    preds, refs = [], []
    model.eval()

    for i in range(0, len(ds), batch_size):
        batch = ds[i:i+batch_size]

        input_ids = torch.tensor(batch["input_ids"]).to(device)
        attn_mask = torch.tensor(batch["attention_mask"]).to(device)

        with torch.no_grad():
            out = model.generate(
                input_ids=input_ids,
                attention_mask=attn_mask,
                max_new_tokens=cfg.max_new_tokens,
                num_beams=4,
            )

        preds.extend(tokenizer.batch_decode(out, skip_special_tokens=True))

        labels = [[x for x in seq if x != -100] for seq in batch["labels"]]
        refs.extend(tokenizer.batch_decode(labels, skip_special_tokens=True))

    return rouge.compute(predictions=preds, references=refs, use_stemmer=True)

t5_scores = evaluate_model(t5_model, t5_tokenizer, val_ds, cfg.batch_t5_large)
t5_scores


{'rouge1': np.float64(0.4493530154349382),
 'rouge2': np.float64(0.2222625971649141),
 'rougeL': np.float64(0.32033624940882793),
 'rougeLsum': np.float64(0.3204991723085763)}

Your ROUGE results indicate that the model is performing very strongly for extractive + abstractive news summarization, used parameter-efficient LoRA training on a Mac MPS device. A ROUGE-1 score around 0.41–0.45, ROUGE-2 around 0.18–0.22, and ROUGE-L around 0.28–0.32 are typical for competitive baseline models on this task, meaning your model is already capturing the important words and sentence structure from the articles with good coherence. These scores indicate that your system produces summaries that significantly overlap with human-written references and are aligned with the learning objective of the project: creating a summarizer that is accurate, concise, and ready to be further improved through a human feedback loop (Sprint 3 with TRLX). Overall, these ROUGE scores show that your system is functioning correctly, is well-trained for Sprint 2, and is strong enough to move confidently into the RLHF phase.

## BART-LARGE + LoRA

In [30]:
from peft import LoraConfig, get_peft_model

def apply_lora_bart(model):
    lora_cfg = LoraConfig(
        r=cfg.r,
        lora_alpha=cfg.alpha,
        lora_dropout=cfg.dropout,
        bias="none",
        task_type="SEQ_2_SEQ_LM",
        target_modules=["q_proj", "v_proj"],   
    )
    return get_peft_model(model, lora_cfg)


In [31]:
bart_name = "facebook/bart-large-cnn"

bart_tokenizer = AutoTokenizer.from_pretrained(bart_name)
bart_model = AutoModelForSeq2SeqLM.from_pretrained(bart_name)

bart_model = apply_lora_bart(bart_model)
bart_model.to(device)

bart_model

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): BartForConditionalGeneration(
      (model): BartModel(
        (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
        (encoder): BartEncoder(
          (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
          (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
          (layers): ModuleList(
            (0-11): 12 x BartEncoderLayer(
              (self_attn): BartAttention(
                (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
                (v_proj): lora.Linear(
                  (base_layer): Linear(in_features=1024, out_features=1024, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.05, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=1024, out_features=8, bias=False)
                  )
                  (lora_B): Modul

In [32]:
bart_collator = DataCollatorForSeq2Seq(bart_tokenizer, model=bart_model)


In [33]:
bart_args = TrainingArguments(
    output_dir=cfg.bart_large_out,
    learning_rate=cfg.lr,
    num_train_epochs=cfg.epochs,
    per_device_train_batch_size=cfg.batch_bart_large,
    per_device_eval_batch_size=cfg.batch_bart_large,
    logging_steps=50,
)

In [34]:
bart_trainer = Trainer(
    model=bart_model,
    args=bart_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=bart_tokenizer,
    data_collator=bart_collator,
)


  bart_trainer = Trainer(


In [36]:
os.makedirs(cfg.bart_large_out, exist_ok=True)

bart_trainer.train()

bart_model.save_pretrained(cfg.bart_large_out)
bart_tokenizer.save_pretrained(cfg.bart_large_out)

print(" BART-Large LoRA saved to:", cfg.bart_large_out)


Step,Training Loss
50,6.9266
100,3.2942
150,3.1937
200,2.9183
250,2.9605
300,2.7543
350,2.6313
400,2.6572
450,2.5338
500,2.6069


 BART-Large LoRA saved to: /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/BART-large


In [37]:
bart_scores = evaluate_model(bart_model, bart_tokenizer, val_ds, cfg.batch_bart_large)
bart_scores


{'rouge1': np.float64(0.38356077267525845),
 'rouge2': np.float64(0.17914820700938355),
 'rougeL': np.float64(0.2532124026597712),
 'rougeLsum': np.float64(0.253238931012006)}

In [38]:
print("T5-LARGE LoRA ROUGE:", t5_scores)
print("BART-LARGE LoRA ROUGE:", bart_scores)

best_model = "T5-Large" if t5_scores["rougeL"] > bart_scores["rougeL"] else "BART-Large"

print("\n BEST MODEL (by ROUGE-L):", best_model)


T5-LARGE LoRA ROUGE: {'rouge1': np.float64(0.4493530154349382), 'rouge2': np.float64(0.2222625971649141), 'rougeL': np.float64(0.32033624940882793), 'rougeLsum': np.float64(0.3204991723085763)}
BART-LARGE LoRA ROUGE: {'rouge1': np.float64(0.38356077267525845), 'rouge2': np.float64(0.17914820700938355), 'rougeL': np.float64(0.2532124026597712), 'rougeLsum': np.float64(0.253238931012006)}

 BEST MODEL (by ROUGE-L): T5-Large


In [39]:
def spot_check(model, tokenizer, ds, n=3):
    for idx in random.sample(range(len(ds)), n):
        item = ds[idx]

        input_ids = torch.tensor(item["input_ids"]).unsqueeze(0).to(device)
        attn_mask = torch.tensor(item["attention_mask"]).unsqueeze(0).to(device)

        with torch.no_grad():
            out = model.generate(
                input_ids=input_ids,
                attention_mask=attn_mask,
                num_beams=4,
                max_new_tokens=cfg.max_new_tokens,
            )

        pred = tokenizer.decode(out[0], skip_special_tokens=True)
        ref  = tokenizer.decode([x for x in item["labels"] if x != -100], skip_special_tokens=True)

        print("=" * 80)
        print("PRED:", pred)
        print("-" * 80)
        print("REF :", ref)

print("\n====== SAMPLE SUMMARIES ======\n")

if best_model == "T5-Large":
    spot_check(t5_model, t5_tokenizer, val_ds)
else:
    spot_check(bart_model, bart_tokenizer, val_ds)




PRED: Tom Sosnik, 13, of Fresno, California, came out as a transgender teenager in front of his entire class . He first read the suicide note of Leelah Alcorn, another tragic teen who took her own life at the end of last year . Sosnik then told his fellow students; 'I am no longer Mia. I never really was' He also let other students know if they were struggling with their sexual identity or orientation, he was there for them .
--------------------------------------------------------------------------------
REF : Tom Sosnik came out as transgender in a speech to his fellow classmates . 'I am no longer Mia. I never really was,' Tom, 13, told the class . He began his speech by reading the tragic suicide note of another transgender teenager, Leelah Alcorn, who killed herself last year .
PRED: Manchester City crashed out of the Champions League to Barcelona . There is no English presence in the Europa League either after Everton’s harsh lesson against Dynamo Kiev on Thursday night . In Ger

In [7]:
# MERGE LoRA WEIGHTS INTO T5-LARGE 

import os
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
from peft import PeftModel

# Use absolute paths to avoid confusion
BASE_MODEL_PATH = "t5-large"  
LORA_MODEL_DIR = "/Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large"
MERGED_OUTPUT_DIR = "/Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large-merged"

os.makedirs(MERGED_OUTPUT_DIR, exist_ok=True)

# Device setup
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS )")
else:
    device = torch.device("cpu")
    print("MPS not available - Using CPU")

# Verify LoRA model exists
if not os.path.exists(LORA_MODEL_DIR):
    raise FileNotFoundError(f"LoRA model directory not found: {LORA_MODEL_DIR}")

print(f"\n Loading base T5-Large from HuggingFace: {BASE_MODEL_PATH}")
base_model = T5ForConditionalGeneration.from_pretrained(BASE_MODEL_PATH)
base_model = base_model.to(device)

print(f" Loading LoRA adapter from: {LORA_MODEL_DIR}")
try:
    lora_model = PeftModel.from_pretrained(base_model, LORA_MODEL_DIR)
    lora_model = lora_model.to(device)
except Exception as e:
    print(f"Error loading LoRA model: {e}")
    raise

print(" Merging LoRA weights into base model")
merged_model = lora_model.merge_and_unload()

# Move to CPU for saving
merged_model = merged_model.to("cpu")

print(f" Saving merged model to: {MERGED_OUTPUT_DIR}")
merged_model.save_pretrained(MERGED_OUTPUT_DIR)

# Save tokenizer
print(" Saving tokenizer")
tokenizer = T5Tokenizer.from_pretrained(LORA_MODEL_DIR)
tokenizer.save_pretrained(MERGED_OUTPUT_DIR)

print(f"\n SUCCESS! Merged T5-Large + LoRA model saved to:")
print(f"   {MERGED_OUTPUT_DIR}")

Using MPS )

 Loading base T5-Large from HuggingFace: t5-large
 Loading LoRA adapter from: /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large
 Loading LoRA adapter from: /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large
 Merging LoRA weights into base model
 Merging LoRA weights into base model
 Saving merged model to: /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large-merged
 Saving merged model to: /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large-merged
 Saving tokenizer

 SUCCESS! Merged T5-Large + LoRA model saved to:
   /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large-merged
 Saving tokenizer

 SUCCESS! Merged T5-Large + LoRA model saved to:
   /Users/dhruvyellanki/Documents/Projects/RLHF_News_Summarization_System/data/models/t5-large-merged
