In [None]:
import sys
import os

print("Python version:", sys.version)
print("Working directory:", os.getcwd())

!pip install -q transformers==4.44.2 datasets==3.0.2 evaluate==0.4.2 rouge_score==0.1.2
!pip install -q peft==0.11.1 accelerate==1.0.1
!pip install -q gradio==4.44.0 sentencepiece protobuf


try:
    import bitsandbytes
    print("Warning:bitsandbytes found,uninstalling")
    !pip uninstall -y bitsandbytes
    print("bitsandbytes removed")
except ImportError:
    print("bitsandbytes not installed(correct)")

import nltk
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)

print("\nAll packages installed successfully!")

In [3]:
#cell2
import json
import re
import random
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    set_seed
)
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType,
    PeftModel,
    PeftConfig
)
import evaluate

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

PyTorch version: 2.6.0+cu124
CUDA available: True
CUDA device: Tesla T4
CUDA memory: 15.83 GB


In [None]:
#Cll3Configuration for Better Model
CONFIG = {
    "model_name": "facebook/bart-base",
    "run_name": "bart_lora_cnndm_improved",
    
    "data_path": "/kaggle/input/newspaper-text-summarization-cnn-dailymail",
    "article_column": "article",
    "summary_column": "highlights",
    
    "max_source_length": 1024,
    "max_target_length": 128,
    "num_train_samples": 50000, 
    "num_val_samples": 2000,     
    "num_test_samples": 2000,   
  
    "lora_r": 32,            
    "lora_alpha": 64,        
    "lora_target_modules": ["q_proj", "v_proj", "k_proj", "out_proj"], 
    "lora_dropout": 0.1,
    
    "learning_rate": 5e-5,     
    "num_epochs": 4,           
    "train_batch_size": 4,
    "eval_batch_size": 8,
    "gradient_accumulation_steps": 4,
    "warmup_ratio": 0.05,       
    "weight_decay": 0.01,
    "lr_scheduler_type": "cosine",
    "label_smoothing_factor": 0.1,
    
    "generation_max_length": 128,
    "generation_num_beams": 4,
    "generation_length_penalty": 1.8, 
    "generation_no_repeat_ngram_size": 3,
    
    "fp16": True,
    "seed": 42,
    "logging_steps": 100,
    "eval_steps": 1000,         
    "save_steps": 2000,
    "save_total_limit": 3,      
    "output_dir": "/kaggle/working/checkpoints",
    "results_dir": "/kaggle/working/results",
}

if "t5" in CONFIG["model_name"].lower():
    CONFIG["lora_target_modules"] = ["q", "v", "k", "o"]
    CONFIG["max_source_length"] = 512

print(f"\nData:")
print(f"  Training samples: {CONFIG['num_train_samples']:,} (5√ó increase)")
print(f"  Validation: {CONFIG['num_val_samples']:,}")
print(f"  Test: {CONFIG['num_test_samples']:,}")

print(f"\nLoRA:")
print(f"  Rank: {CONFIG['lora_r']} (2√ó increase)")
print(f"  Alpha: {CONFIG['lora_alpha']}")
print(f"  Target modules: {len(CONFIG['lora_target_modules'])} modules")

print(f"\nTraining:")
print(f"  Epochs: {CONFIG['num_epochs']}")
print(f"  Learning rate: {CONFIG['learning_rate']}")
print(f"  Effective batch: {CONFIG['train_batch_size'] * CONFIG['gradient_accumulation_steps']}")

estimated_time = (CONFIG['num_train_samples'] / (CONFIG['train_batch_size'] * CONFIG['gradient_accumulation_steps'])) * CONFIG['num_epochs'] * 0.8 / 60
print(f"\nEstimated training time: ~{estimated_time:.0f} minutes ({estimated_time/60:.1f} hours)")


import os
os.makedirs(CONFIG["output_dir"], exist_ok=True)
os.makedirs(CONFIG["results_dir"], exist_ok=True)

from transformers import set_seed
set_seed(CONFIG["seed"])
print(f"\nRandom seed set to {CONFIG['seed']}")

In [8]:
#CELL4Load Dataset 
train_df = pd.read_csv("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/train.csv")
val_df = pd.read_csv("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/validation.csv")
test_df = pd.read_csv("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/test.csv")

print(f"Dataset loaded:")
print(f"  Train: {len(train_df):,} samples")
print(f"  Validation: {len(val_df):,} samples")
print(f"  Test: {len(test_df):,} samples")

print(f"\nColumns: {train_df.columns.tolist()}")
print(f"Using: article ‚Üí highlights")

print("\n" + "="*80)
print("SAMPLE ARTICLE & SUMMARY")
print("="*80)
sample = train_df.iloc[0]
print(f"\nARTICLE ({len(sample['article'])} chars):")
print(sample['article'][:500] + "...")
print(f"\nSUMMARY ({len(sample['highlights'])} chars):")
print(sample['highlights'])
print("="*80)

üì• Loading CNN/DailyMail dataset from Kaggle...
‚úÖ Dataset loaded:
  Train: 287,113 samples
  Validation: 13,368 samples
  Test: 11,490 samples

üìã Columns: ['id', 'article', 'highlights']
‚úÖ Using: article ‚Üí highlights

üì∞ SAMPLE ARTICLE & SUMMARY

üìÑ ARTICLE (1211 chars):
By . Associated Press . PUBLISHED: . 14:11 EST, 25 October 2013 . | . UPDATED: . 15:36 EST, 25 October 2013 . The bishop of the Fargo Catholic Diocese in North Dakota has exposed potentially hundreds of church members in Fargo, Grand Forks and Jamestown to the hepatitis A virus in late September and early October. The state Health Department has issued an advisory of exposure for anyone who attended five churches and took communion. Bishop John Folda (pictured) of the Fargo Catholic Diocese in N...

‚ú® SUMMARY (220 chars):
Bishop John Folda, of North Dakota, is taking time off after being diagnosed .
He contracted the infection through contaminated food in Italy .
Church members in Fargo, Grand Forks an

In [9]:
#CELL5Data Preprocessing
def clean_text(text):
    """Basic text cleaning"""
    if pd.isna(text):
        return ""
    text = str(text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def prepare_dataset(df, num_samples=None):
    """Prepare dataset for training"""
    df = df.dropna(subset=['article', 'highlights'])
    df = df[df['article'].str.len() > 50]#drop nulls and very short text
    df = df[df['highlights'].str.len() > 10]
    
    if num_samples and num_samples < len(df):
        df = df.sample(n=num_samples, random_state=CONFIG['seed']).reset_index(drop=True)
    
    df['article'] = df['article'].apply(clean_text)
    df['highlights'] = df['highlights'].apply(clean_text)
    
    return df[['article', 'highlights']]

print("preprocessing datasets")

train_clean = prepare_dataset(train_df, CONFIG['num_train_samples'])
val_clean = prepare_dataset(val_df, CONFIG['num_val_samples'])
test_clean = prepare_dataset(test_df, CONFIG['num_test_samples'])

print(f"\nDataset sizes:")
print(f"  Train: {len(train_clean):,}")
print(f"  Validation: {len(val_clean):,}")
print(f"  Test: {len(test_clean):,}")

from datasets import Dataset, DatasetDict

train_dataset = Dataset.from_pandas(train_clean)
val_dataset = Dataset.from_pandas(val_clean)
test_dataset = Dataset.from_pandas(test_clean)

dataset = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test': test_dataset
})

print("\nconverted to HuggingFace Dataset format")

üîÑ Preprocessing datasets...

‚úÖ Dataset sizes:
  Train: 10,000
  Validation: 1,000
  Test: 1,000

‚úÖ Converted to HuggingFace Dataset format


In [None]:
#CELL6
print(f"Loading tokenizer: {CONFIG['model_name']}")
tokenizer = AutoTokenizer.from_pretrained(CONFIG['model_name'], use_fast=True)

def preprocess_function(examples):
    """Tokenize articles and summaries"""
    # For T5, add task prefix
    if "t5" in CONFIG['model_name'].lower():
        inputs = [f"summarize: {doc}" for doc in examples['article']]
    else:
        inputs = examples['article']
    
    targets = examples['highlights']
    
    model_inputs = tokenizer(
        inputs,
        max_length=CONFIG['max_source_length'],
        padding='max_length',
        truncation=True,
    )
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            targets,
            max_length=CONFIG['max_target_length'],
            padding='max_length',
            truncation=True,
        )
    
    labels_ids = labels['input_ids']
    labels_ids = [
        [(label if label != tokenizer.pad_token_id else -100) for label in label_ids]
        for label_ids in labels_ids
    ]
    
    model_inputs['labels'] = labels_ids
    return model_inputs

print("tokenizing datasets")
tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=['article', 'highlights'],
    desc="Tokenizing",
)

print("tokenization complete")
print(f"Sample: input_ids length = {len(tokenized_dataset['train'][0]['input_ids'])}")

In [1]:
#CELL7
print(f"\nloading base model: {CONFIG['model_name']}")

import sys
if 'bitsandbytes' in sys.modules:
    print("Warning:bitsandbytes is loaded.Removing from cache")
    del sys.modules['bitsandbytes']

base_model = AutoModelForSeq2SeqLM.from_pretrained(CONFIG['model_name'])
print(f"Base model loaded: {sum(p.numel() for p in base_model.parameters()):,} parameters")

from peft import LoraConfig, get_peft_model, TaskType
import os

os.environ['BITSANDBYTES_NOWELCOME'] = '1'

lora_config = LoraConfig(
    r=CONFIG['lora_r'],
    lora_alpha=CONFIG['lora_alpha'],
    target_modules=CONFIG['lora_target_modules'],
    lora_dropout=CONFIG['lora_dropout'],
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM,
    inference_mode=False,
)

print("\nApplying LoRA adapters...")

import peft.tuners.lora.model as lora_model

original_create = lora_model.LoraModel._create_new_module

def patched_create(lora_config, adapter_name, target, **kwargs):
    from peft.tuners.lora.layer import LoraLayer, dispatch_default
    
    new_module = None
    
    # Only use default dispatch (skip bnb)
    new_module = dispatch_default(
        target,
        adapter_name,
        lora_config=lora_config,
        **kwargs,
    )
    
    return new_module

#apply patch
lora_model.LoraModel._create_new_module = staticmethod(patched_create)

#apply LoRA
model = get_peft_model(base_model, lora_config)

print("LoRA applied successfully!")
print("\nmodel Statistics:")
model.print_trainable_parameters()

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
all_params = sum(p.numel() for p in model.parameters())

print(f"\nparameter breakdown:")
print(f"Total parameters:      {all_params:,}")
print(f"Trainable parameters:  {trainable_params:,}")
print(f"Trainable %:           {100 * trainable_params / all_params:.2f}%")
print(f"Memory savings:        ~{100 - (100 * trainable_params / all_params):.0f}%")

NameError: name 'CONFIG' is not defined

In [2]:
#CELL8:training Setup 
import evaluate
from evaluate import load

try:
    rouge = evaluate.load("rouge")
except AttributeError:
    print("fixing evaluate library compatibility")
    from rouge_score import rouge_scorer
    
    class RougeMetric:
        def __init__(self):
            self.scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
        
        def compute(self, predictions, references, **kwargs):
            scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}
            for pred, ref in zip(predictions, references):
                score = self.scorer.score(ref, pred)
                scores['rouge1'].append(score['rouge1'].fmeasure)
                scores['rouge2'].append(score['rouge2'].fmeasure)
                scores['rougeL'].append(score['rougeL'].fmeasure)
            
            return {
                'rouge1': sum(scores['rouge1']) / len(scores['rouge1']),
                'rouge2': sum(scores['rouge2']) / len(scores['rouge2']),
                'rougeL': sum(scores['rougeL']) / len(scores['rougeL']),
                'rougeLsum': sum(scores['rougeL']) / len(scores['rougeL']),
            }
    
    rouge = RougeMetric()
    print("using rouge_score fallback")

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

def compute_metrics(eval_pred):
    """Compute ROUGE scores during evaluation (FIXED for overflow)"""
    preds, labels = eval_pred
    if isinstance(preds, tuple):
        preds = preds[0]
    vocab_size = tokenizer.vocab_size
    preds = np.clip(preds, 0, vocab_size - 1)
    
    try:
        decoded_preds = []
        for pred in preds:
            # Filter out invalid token IDs
            valid_pred = [int(t) for t in pred if 0 <= t < vocab_size]
            decoded_preds.append(tokenizer.decode(valid_pred, skip_special_tokens=True))
    except Exception as e:
        print(f"warning in decoding predictions: {e}")
        return {
            'rouge1': 0.0,
            'rouge2': 0.0,
            'rougeL': 0.0,
            'rougeLsum': 0.0,
        }
    
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    
    labels = np.clip(labels, 0, vocab_size - 1)
    
    try:
        decoded_labels = []
        for label in labels:
            valid_label = [int(t) for t in label if 0 <= t < vocab_size]
            decoded_labels.append(tokenizer.decode(valid_label, skip_special_tokens=True))
    except Exception as e:
        print(f"warning in decoding labels: {e}")
        return {
            'rouge1': 0.0,
            'rouge2': 0.0,
            'rougeL': 0.0,
            'rougeLsum': 0.0,
        }
    decoded_preds = [p.strip() if p.strip() else "." for p in decoded_preds]
    decoded_labels = [l.strip() if l.strip() else "." for l in decoded_labels]
    
    try:
        result = rouge.compute(
            predictions=decoded_preds,
            references=decoded_labels,
            use_stemmer=True
        )
        
        return {
            'rouge1': round(result['rouge1'] * 100, 2),
            'rouge2': round(result['rouge2'] * 100, 2),
            'rougeL': round(result['rougeL'] * 100, 2),
            'rougeLsum': round(result.get('rougeLsum', result['rougeL']) * 100, 2),
        }
    except Exception as e:
        print(f"warning in ROUGE computation: {e}")
        return {
            'rouge1': 0.0,
            'rouge2': 0.0,
            'rougeL': 0.0,
            'rougeLsum': 0.0,
        }

output_dir = f"{CONFIG['output_dir']}/{CONFIG['run_name']}"

training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    evaluation_strategy="steps",
    eval_steps=CONFIG['eval_steps'],
    logging_steps=CONFIG['logging_steps'],
    save_steps=CONFIG['save_steps'],
    save_total_limit=CONFIG['save_total_limit'],
    learning_rate=CONFIG['learning_rate'],
    per_device_train_batch_size=CONFIG['train_batch_size'],
    per_device_eval_batch_size=CONFIG['eval_batch_size'],
    gradient_accumulation_steps=CONFIG['gradient_accumulation_steps'],
    num_train_epochs=CONFIG['num_epochs'],
    weight_decay=CONFIG['weight_decay'],
    warmup_ratio=CONFIG['warmup_ratio'],
    lr_scheduler_type=CONFIG['lr_scheduler_type'],
    label_smoothing_factor=CONFIG['label_smoothing_factor'],
    predict_with_generate=True,
    generation_max_length=CONFIG['generation_max_length'],
    generation_num_beams=CONFIG['generation_num_beams'],
    fp16=CONFIG['fp16'],
    logging_dir=f"{output_dir}/logs",
    report_to=['tensorboard'],
    seed=CONFIG['seed'],
    load_best_model_at_end=True,
    metric_for_best_model='rougeL',
    greater_is_better=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

effective_batch = CONFIG['train_batch_size'] * CONFIG['gradient_accumulation_steps']
total_steps = len(tokenized_dataset['train']) // effective_batch * CONFIG['num_epochs']

print("training setup complete!")
print(f"\ntraining Configuration:")
print(f"  Training samples:      {len(tokenized_dataset['train']):,}")
print(f"  Validation samples:    {len(tokenized_dataset['validation']):,}")
print(f"  Batch size per device: {CONFIG['train_batch_size']}")
print(f"  Gradient accumulation: {CONFIG['gradient_accumulation_steps']}")
print(f"  Effective batch size:  {effective_batch}")
print(f"  Total epochs:          {CONFIG['num_epochs']}")
print(f"  Total steps:           {total_steps:,}")
print(f"  Eval every:            {CONFIG['eval_steps']} steps")
print(f"  Learning rate:         {CONFIG['learning_rate']}")
print(f"  LR scheduler:          {CONFIG['lr_scheduler_type']}")
print(f"\nestimated training time: ~{total_steps * 0.7 / 60:.0f} minutes")

ModuleNotFoundError: No module named 'evaluate'

In [3]:
# CELL9Train
import pandas as pd
from datetime import datetime

print("="*80)
print("STARTING TRAINING")
print("="*80)
print(f"Started at: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
print(f"Model: {CONFIG['model_name']}")
print(f"LoRA Rank: {CONFIG['lora_r']}")
print(f"Training samples: {len(tokenized_dataset['train']):,}")
print(f"Estimated time: ~22 minutes")
print("="*80 + "\n")

checkpoints = []
if os.path.exists(output_dir):
    checkpoints = [d for d in os.listdir(output_dir) if d.startswith('checkpoint-')]

if checkpoints:
    latest_checkpoint = max(checkpoints, key=lambda x: int(x.split('-')[1]))
    checkpoint_path = os.path.join(output_dir, latest_checkpoint)
    print(f"Found existing checkpoint: {checkpoint_path}")
    print(f"Resuming training from checkpoint\n")
    
    try:
        train_result = trainer.train(resume_from_checkpoint=checkpoint_path)
    except Exception as e:
        print(f"Error resuming from checkpoint: {e}")
        print("Starting fresh training\n")
        train_result = trainer.train()
else:
    print("Starting fresh training\n")
    train_result = trainer.train()

print("\n" + "="*80)
print("tRAINING cOMPLETE")
print("="*80)
print(f"Finished at: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")

print("\nFinal Training Metrics:")
for key, value in train_result.metrics.items():
    if isinstance(value, float):
        print(f"  {key:.<35} {value:.4f}")
    else:
        print(f"  {key:.<35} {value}")

# Save model
print(f"\nüíæ Saving model to: {output_dir}")
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

print("Saving LoRA adapter weights")
try:
    model.save_pretrained(output_dir)
    print("LoRA adapters saved")
except Exception as e:
    print(f"LoRA save warning: {e}")

with open(f"{CONFIG['results_dir']}/training_metrics.json", 'w') as f:
    json.dump(train_result.metrics, f, indent=2)

training_info = {
    'model': CONFIG['model_name'],
    'lora_config': {
        'rank': CONFIG['lora_r'],
        'alpha': CONFIG['lora_alpha'],
        'target_modules': CONFIG['lora_target_modules'],
        'dropout': CONFIG['lora_dropout'],
    },
    'training_args': {
        'learning_rate': CONFIG['learning_rate'],
        'epochs': CONFIG['num_epochs'],
        'batch_size': CONFIG['train_batch_size'],
        'gradient_accumulation': CONFIG['gradient_accumulation_steps'],
        'effective_batch_size': CONFIG['train_batch_size'] * CONFIG['gradient_accumulation_steps'],
    },
    'dataset_sizes': {
        'train': len(tokenized_dataset['train']),
        'validation': len(tokenized_dataset['validation']),
    },
    'metrics': train_result.metrics,
    'completed_at': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
}

with open(f"{CONFIG['results_dir']}/training_info.json", 'w') as f:
    json.dump(training_info, f, indent=2)

print("\nModel and metrics saved successfully!")
print(f"Model location: {output_dir}")
print(f"Results location: {CONFIG['results_dir']}")

try:
    import subprocess
    result = subprocess.run(['du', '-sh', output_dir], capture_output=True, text=True)
    print(f"\nModel size: {result.stdout.split()[0]}")
except:
    pass


STARTING TRAINING
Started at: 2025-11-13 05:44:30 UTC


NameError: name 'CONFIG' is not defined

In [19]:
from datetime import datetime

print("\n" + "="*80)
print("EVALUATING ON TEST SET")
print("="*80)
print(f"Started: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")

model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

references = []
predictions = []
examples = []

print(f"\ngenerating summaries for {len(test_clean)} test samples")

for idx in tqdm(range(len(test_clean)), desc="Generating"):
    row = test_clean.iloc[idx]
    article = row['article']
    reference = row['highlights']
    
   
    if "t5" in CONFIG['model_name'].lower(): #prepare input
        input_text = f"summarize: {article}"
    else:
        input_text = article
    
    inputs = tokenizer(
        input_text,
        max_length=CONFIG['max_source_length'],
        truncation=True,
        return_tensors='pt'
    ).to(device)
    
    try:
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=CONFIG['generation_max_length'],
                num_beams=CONFIG['generation_num_beams'],
                length_penalty=CONFIG['generation_length_penalty'],
                no_repeat_ngram_size=CONFIG['generation_no_repeat_ngram_size'],
                early_stopping=True,
            )
        
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception as e:
        print(f"error at index {idx}: {e}")
        prediction = "Error generating summary."
    
    references.append(reference)
    predictions.append(prediction)
    
    if idx < 20:
        examples.append({
            'id': idx,
            'article': article[:1000] + '...' if len(article) > 1000 else article,
            'reference': reference,
            'generated': prediction,
        })

print("\ncomputing ROUGE scores")
rouge_scores = rouge.compute(predictions=predictions, references=references, use_stemmer=True)

test_results = {
    'rouge1': round(rouge_scores['rouge1'] * 100, 2),
    'rouge2': round(rouge_scores['rouge2'] * 100, 2),
    'rougeL': round(rouge_scores['rougeL'] * 100, 2),
    'rougeLsum': round(rouge_scores.get('rougeLsum', rouge_scores['rougeL']) * 100, 2),
    'num_samples': len(predictions),
    'model': CONFIG['model_name'],
    'lora_r': CONFIG['lora_r'],
    'date': datetime.utcnow().strftime('%Y-%m-%d'),
    'user': 'asheeradnan',
}

print("\n" + "="*80)
print("TEST SET RESULTS")
print("="*80)
print(f"\nmodel: {CONFIG['model_name']}")
print(f"LoRA Rank: {CONFIG['lora_r']}")
print(f"Test Samples: {test_results['num_samples']:,}")
print(f"\nROUGE Scores:")
print(f" ROUGE-1:    {test_results['rouge1']:.2f}")
print(f" ROUGE-2:    {test_results['rouge2']:.2f}")
print(f" ROUGE-L:    {test_results['rougeL']:.2f}")
print(f" ROUGE-Lsum: {test_results['rougeLsum']:.2f}")
print("="*80)

ref_lengths = [len(r.split()) for r in references]
pred_lengths = [len(p.split()) for p in predictions]
article_lengths = [len(test_clean.iloc[i]['article'].split()) for i in range(len(test_clean))]
compression_ratios = [pred_lengths[i] / article_lengths[i] if article_lengths[i] > 0 else 0 for i in range(len(pred_lengths))]

print(f"\nSummary Statistics:")
print(f"Avg article length:    {np.mean(article_lengths):.1f} words")
print(f"Avg reference length:  {np.mean(ref_lengths):.1f} words")
print(f"Avg generated length:  {np.mean(pred_lengths):.1f} words")
print(f"Avg compression ratio: {np.mean(compression_ratios):.2%}")
print(f"Min generated length:  {min(pred_lengths)} words")
print(f"Max generated length:  {max(pred_lengths)} words")

results_path = f"{CONFIG['results_dir']}/{CONFIG['run_name']}"
os.makedirs(results_path, exist_ok=True)

with open(f"{results_path}/test_rouge.json", 'w') as f:
    json.dump(test_results, f, indent=2)

with open(f"{results_path}/examples.json", 'w', encoding='utf-8') as f:
    json.dump(examples, f, indent=2, ensure_ascii=False)

statistics = {
    'article_lengths': {'mean': float(np.mean(article_lengths)), 'std': float(np.std(article_lengths))},
    'reference_lengths': {'mean': float(np.mean(ref_lengths)), 'std': float(np.std(ref_lengths))},
    'generated_lengths': {'mean': float(np.mean(pred_lengths)), 'std': float(np.std(pred_lengths))},
    'compression_ratio': {'mean': float(np.mean(compression_ratios)), 'std': float(np.std(compression_ratios))},
}

with open(f"{results_path}/statistics.json", 'w') as f:
    json.dump(statistics, f, indent=2)

print(f"\nResults saved to: {results_path}")
print(f"completed: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} ")


üìä EVALUATING ON TEST SET
‚è∞ Started: 2025-11-12 17:50:17 UTC

üîÑ Generating summaries for 1000 test samples...
This may take 5-10 minutes...



Generating:   0%|          | 0/1000 [00:00<?, ?it/s]


üîÑ Computing ROUGE scores...

üìà TEST SET RESULTS

ü§ñ Model: facebook/bart-base
üß¨ LoRA Rank: 16
üìä Test Samples: 1,000

üìê ROUGE Scores:
  ROUGE-1:    40.10
  ROUGE-2:    17.92
  ROUGE-L:    27.17
  ROUGE-Lsum: 27.17

üìè Summary Statistics:
  Avg article length:    690.4 words
  Avg reference length:  54.3 words
  Avg generated length:  55.1 words
  Avg compression ratio: 10.46%
  Min generated length:  14 words
  Max generated length:  111 words

üíæ Results saved to: /kaggle/working/results/bart_lora_cnndm
‚è∞ Completed: 2025-11-12 18:03:54 UTC


In [20]:
#CELL11example Outputs
print("\n" + "="*80)
print("EXAMPLE SUMMARIES")
print("="*80)

for i, ex in enumerate(examples[:5], 1):
    print(f"\n{'‚îÄ'*80}")
    print(f"EXAMPLE {i}")
    print('‚îÄ'*80)
    
    print(f"\nARTICLE:")
    print(ex['article'])
    
    print(f"\nREFERENCE SUMMARY:")
    print(ex['reference'])
    
    print(f"\nGENERATED SUMMARY:")
    print(ex['generated'])
    
    ref_words = set(ex['reference'].lower().split())
    gen_words = set(ex['generated'].lower().split())
    
    if ref_words:
        overlap = len(ref_words & gen_words) / len(ref_words) * 100
        precision = len(ref_words & gen_words) / len(gen_words) * 100 if gen_words else 0
        
        print(f"\nMetrics:")
        print(f"word overlap (recall): {overlap:.1f}%")
        print(f" Precision:             {precision:.1f}%")
        print(f" Reference length:      {len(ex['reference'].split())} words")
        print(f" Generated length:      {len(ex['generated'].split())} words")

print("\n" + "="*80)


üìù EXAMPLE SUMMARIES

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
EXAMPLE 1
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

üì∞ ARTICLE:
Comedian Jenny Eclair travelled with her other half on a Painting In Venus break with Flavours . There comes a time in a woman‚Äôs life when beach holidays just don‚Äôt cut it any longer, when lying on golden sands (unless you‚Äôre buried up to your neck) serves only to remind you how much weight you forgot to lose again this year and how ill-fitting your swimming costume is. Being control freaks, most fifty-something females find ‚Äòdoing nothing‚Äô a bit boring 

In [None]:
#Deploy Real-Time Summarization Interface with Gradio
import gradio as gr
from datetime import datetime

print("="*80)
print("üåê DEPLOYING GRADIO INTERFACE")
print("="*80)
print(f"‚è∞ Started: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
print(f"üë§ User: asheeradnan")

# Load the trained model
print(f"\nüì¶ Loading model from: {output_dir}")
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"‚úÖ Model loaded on: {device}")

def summarize_article(article_text, max_length=128, num_beams=4, length_penalty=2.0, min_length=30):
    """
    Generate summary for input article
    
    Args:
        article_text: Input news article
        max_length: Maximum summary length
        num_beams: Beam search beams
        length_penalty: Length penalty factor
        min_length: Minimum summary length
    
    Returns:
        Generated summary
    """
    if not article_text or len(article_text.strip()) < 50:
        return "‚ö†Ô∏è Please enter an article with at least 50 characters."
    
    try:
        # Tokenize
        inputs = tokenizer(
            article_text,
            max_length=CONFIG['max_source_length'],
            truncation=True,
            return_tensors='pt'
        ).to(device)
        
        # Generate
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=int(max_length),
                min_length=int(min_length),
                num_beams=int(num_beams),
                length_penalty=float(length_penalty),
                no_repeat_ngram_size=3,
                early_stopping=True,
            )
        
        # Decode
        summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Statistics
        article_words = len(article_text.split())
        summary_words = len(summary.split())
        compression = (summary_words / article_words * 100) if article_words > 0 else 0
        
        stats = f"\n\nüìä **Statistics:**\n"
        stats += f"- Article length: {article_words} words\n"
        stats += f"- Summary length: {summary_words} words\n"
        stats += f"- Compression ratio: {compression:.1f}%\n"
        stats += f"- Generation time: ~{len(article_text)/2000:.1f}s"
        
        return summary + stats
        
    except Exception as e:
        return f"‚ùå Error generating summary: {str(e)}"

# Sample articles for quick testing
sample_articles = [
    # Sample 1: Politics
    """President Biden announced today a new infrastructure plan that aims to rebuild America's roads, bridges, and public transit systems. The $2 trillion proposal includes funding for clean energy initiatives and would create millions of jobs over the next decade. Republicans have criticized the plan as too expensive, while progressive Democrats argue it doesn't go far enough to address climate change. The bill is expected to face tough negotiations in Congress.""",
    
    # Sample 2: Technology
    """Apple unveiled its latest iPhone model at a virtual event yesterday, featuring an improved camera system with advanced AI capabilities. The new device includes a faster processor, longer battery life, and enhanced 5G connectivity. Pre-orders begin next week with prices starting at $999. Industry analysts predict strong sales despite economic uncertainty. The company also announced updates to its smartwatch and tablet lineup.""",
    
    # Sample 3: Sports
    """Serena Williams defeated her opponent in straight sets to advance to the Wimbledon semifinals. The tennis legend displayed her trademark power and precision, winning 6-3, 6-2 in just 68 minutes. At 41 years old, Williams continues to compete at the highest level and is seeking her 24th Grand Slam title. She will face the tournament's second seed in the next round. The match drew a packed crowd at Centre Court.""",
    
    # Example from test set
    examples[0]['article'] if examples else "Enter your article here...",
]

# Build Gradio interface
with gr.Blocks(
    title="BART LoRA Summarizer",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {font-family: 'Arial', sans-serif;}
    .header {text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;}
    """
) as demo:
    
    # Header
    gr.HTML("""
    <div class="header">
        <h1>üì∞ Abstractive Text Summarizer</h1>
        <p><strong>Powered by BART-base + LoRA</strong></p>
        <p>Fine-tuned on CNN/DailyMail | Author: asheeradnan | Date: 2025-11-12</p>
    </div>
    """)
    
    gr.Markdown("""
    ### ‚ÑπÔ∏è About This Model
    - **Architecture:** BART (Bidirectional and Auto-Regressive Transformers)
    - **Fine-tuning:** LoRA (Low-Rank Adaptation) - only 0.63% parameters trained
    - **Dataset:** 10,000 CNN/DailyMail articles
    - **Performance:** ROUGE-1: {:.2f}, ROUGE-2: {:.2f}, ROUGE-L: {:.2f}
    - **Training:** 22 minutes on Tesla T4 GPU
    """.format(test_results['rouge1'], test_results['rouge2'], test_results['rougeL']))
    
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### üìÑ Input Article")
            
            article_input = gr.Textbox(
                label="",
                placeholder="Paste a news article here (minimum 50 characters)...",
                lines=15,
                max_lines=20,
            )
            
            with gr.Accordion("‚öôÔ∏è Generation Settings (Advanced)", open=False):
                max_length_slider = gr.Slider(
                    minimum=30,
                    maximum=256,
                    value=128,
                    step=8,
                    label="Max Summary Length (tokens)",
                    info="Maximum number of tokens in the summary"
                )
                
                min_length_slider = gr.Slider(
                    minimum=10,
                    maximum=100,
                    value=30,
                    step=5,
                    label="Min Summary Length (tokens)",
                    info="Minimum number of tokens in the summary"
                )
                
                beams_slider = gr.Slider(
                    minimum=1,
                    maximum=8,
                    value=4,
                    step=1,
                    label="Number of Beams",
                    info="Higher = better quality but slower (recommended: 4)"
                )
                
                length_penalty_slider = gr.Slider(
                    minimum=0.5,
                    maximum=3.0,
                    value=2.0,
                    step=0.1,
                    label="Length Penalty",
                    info="Higher = encourages longer summaries (recommended: 2.0)"
                )
            
            with gr.Row():
                clear_btn = gr.Button("üóëÔ∏è Clear", variant="secondary")
                summarize_btn = gr.Button("‚ú® Generate Summary", variant="primary", size="lg")
        
        with gr.Column(scale=2):
            gr.Markdown("### üìù Generated Summary")
            
            summary_output = gr.Textbox(
                label="",
                lines=15,
                max_lines=20,
                show_copy_button=True,
            )
            
            gr.Markdown("""
            ### üí° Tips for Best Results
            - ‚úÖ Use well-formatted news articles
            - ‚úÖ Minimum 100 words recommended
            - ‚úÖ Maximum 1024 tokens (‚âà800 words)
            - ‚ö†Ô∏è Articles longer than 1024 tokens will be truncated
            """)
    
    # Examples section
    gr.Markdown("### üìö Example Articles (Click to Try)")
    gr.Examples(
        examples=[
            [sample_articles[0], 128, 4, 2.0, 30],
            [sample_articles[1], 128, 4, 2.0, 30],
            [sample_articles[2], 128, 4, 2.0, 30],
            [sample_articles[3], 128, 4, 2.0, 30],
        ],
        inputs=[article_input, max_length_slider, beams_slider, length_penalty_slider, min_length_slider],
        outputs=summary_output,
        fn=summarize_article,
        cache_examples=False,
        label="Click an example to load it"
    )
    
    # Footer
    gr.HTML("""
    <div style="text-align: center; margin-top: 30px; padding: 20px; background: #f5f5f5; border-radius: 10px;">
        <h3>üéì NLP Assignment: Transformer Fine-Tuning</h3>
        <p><strong>Task 3:</strong> Encoder-Decoder Architecture for Text Summarization</p>
        <p><strong>Model:</strong> BART-base + LoRA (r=16, Œ±=32)</p>
        <p><strong>Author:</strong> asheeradnan | <strong>Platform:</strong> Kaggle | <strong>Date:</strong> 2025-11-12</p>
        <p style="margin-top: 10px;">
            <strong>ROUGE Scores:</strong> 
            R-1: {:.2f} | R-2: {:.2f} | R-L: {:.2f}
        </p>
        <p style="font-size: 0.9em; color: #666;">
            ‚ö° Powered by Hugging Face Transformers | PEFT | Gradio
        </p>
    </div>
    """.format(test_results['rouge1'], test_results['rouge2'], test_results['rougeL']))
    
    # Button actions
    summarize_btn.click(
        fn=summarize_article,
        inputs=[article_input, max_length_slider, beams_slider, length_penalty_slider, min_length_slider],
        outputs=summary_output,
    )
    
    clear_btn.click(
        fn=lambda: ("", ""),
        inputs=None,
        outputs=[article_input, summary_output],
    )

# Launch the interface
print("\n" + "="*80)
print("üöÄ LAUNCHING GRADIO INTERFACE")
print("="*80)

# Launch with public sharing
demo.launch(
    share=True,  # Creates public URL
    debug=True,
    server_name="0.0.0.0",
    server_port=7860,
    show_error=True,
)

print("\n" + "="*80)
print("‚úÖ GRADIO INTERFACE LAUNCHED!")
print("="*80)
print("\nüìù Instructions:")
print("  1. Click the public URL above (https://xxxxx.gradio.live)")
print("  2. Paste a news article in the input box")
print("  3. Click 'Generate Summary'")
print("  4. Adjust settings in 'Advanced' if needed")
print("\n‚ö†Ô∏è Note: Public URL expires after 72 hours")
print("‚è∞ Launched at: " + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + " UTC")
print("="*80)

üåê DEPLOYING GRADIO INTERFACE
‚è∞ Started: 2025-11-12 18:18:01 UTC
üë§ User: asheeradnan

üì¶ Loading model from: /kaggle/working/checkpoints/bart_lora_cnndm
‚úÖ Model loaded on: cuda

üöÄ LAUNCHING GRADIO INTERFACE
Running on local URL:  http://0.0.0.0:7860
Running on public URL: https://cf5d2f48c5b8bb4282.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


In [None]:
# ================================================================
# CELL 13: Deploy IMPROVED Gradio Interface with Better Summarization
# ================================================================
import gradio as gr
from datetime import datetime
import re

print("="*80)
print("üåê DEPLOYING IMPROVED GRADIO INTERFACE")
print("="*80)
print(f"‚è∞ Started: 2025-11-12 18:28:06 UTC")
print(f"üë§ User: asheeradnan")

# Load the trained model
print(f"\nüì¶ Loading model from: {output_dir}")
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"‚úÖ Model loaded on: {device}")

def is_extractive(summary, article, threshold=0.7):
    """
    Check if summary is too extractive (just copying article text)
    
    Args:
        summary: Generated summary
        article: Original article
        threshold: Similarity threshold (0.7 = 70% match means extractive)
    
    Returns:
        True if summary is too extractive
    """
    # Get first N characters of article (where model often copies from)
    article_start = article[:len(summary)*2].lower()
    summary_lower = summary.lower()
    
    # Check character-level overlap
    match_chars = sum(1 for i, char in enumerate(summary_lower) 
                      if i < len(article_start) and char == article_start[i])
    overlap_ratio = match_chars / len(summary_lower) if summary_lower else 0
    
    return overlap_ratio > threshold

def improve_summary(summary, article):
    """
    Post-process to make summary more abstractive
    """
    # Remove repetitive phrases
    sentences = summary.split('.')
    unique_sentences = []
    seen = set()
    
    for sent in sentences:
        sent = sent.strip()
        if sent and sent not in seen and len(sent) > 10:
            unique_sentences.append(sent)
            seen.add(sent)
    
    improved = '. '.join(unique_sentences)
    if improved and not improved.endswith('.'):
        improved += '.'
    
    return improved

def summarize_article(article_text, max_length=128, num_beams=4, length_penalty=2.0, 
                     min_length=30, temperature=1.0, repetition_penalty=1.2):
    """
    Generate IMPROVED summary with better parameters
    
    New features:
    - Temperature sampling for diversity
    - Repetition penalty to avoid copying
    - Extractiveness detection
    - Re-generation if too extractive
    """
    if not article_text or len(article_text.strip()) < 50:
        return "‚ö†Ô∏è Please enter an article with at least 50 characters."
    
    try:
        # Tokenize
        inputs = tokenizer(
            article_text,
            max_length=CONFIG['max_source_length'],
            truncation=True,
            return_tensors='pt'
        ).to(device)
        
        # Generate with IMPROVED parameters
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=int(max_length),
                min_length=int(min_length),
                num_beams=int(num_beams),
                length_penalty=float(length_penalty),
                repetition_penalty=float(repetition_penalty),  # NEW: Penalize repetition
                no_repeat_ngram_size=3,  # Don't repeat 3-grams
                early_stopping=True,
                do_sample=False,  # Keep deterministic for now
                # temperature=float(temperature),  # Uncomment for sampling
            )
        
        # Decode
        summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Check if too extractive (copying article)
        if is_extractive(summary, article_text, threshold=0.65):
            warning = "\n\n‚ö†Ô∏è **Note:** Summary may be too extractive (copying article text). "
            warning += "This happens with certain article types. Try adjusting parameters or use a longer article."
            
            # Try to improve
            summary = improve_summary(summary, article_text)
            
            # Optionally: Re-generate with different parameters
            # (Uncomment below to auto-retry with higher repetition penalty)
            """
            print("‚ö†Ô∏è Extractive summary detected, regenerating...")
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_length=int(max_length),
                    min_length=int(min_length),
                    num_beams=max(2, int(num_beams)-1),  # Fewer beams for diversity
                    length_penalty=float(length_penalty) * 0.8,  # Less penalty
                    repetition_penalty=2.0,  # Higher penalty
                    no_repeat_ngram_size=4,  # Larger n-gram blocking
                    early_stopping=True,
                )
            summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
            summary = improve_summary(summary, article_text)
            warning = "\n\n‚úÖ **Re-generated** with adjusted parameters for better abstraction."
            """
        else:
            warning = "\n\n‚úÖ **Quality:** Abstractive summary generated successfully!"
        
        # Statistics
        article_words = len(article_text.split())
        summary_words = len(summary.split())
        compression = (summary_words / article_words * 100) if article_words > 0 else 0
        
        # Check for diversity
        unique_words = len(set(summary.lower().split()))
        diversity = (unique_words / summary_words * 100) if summary_words > 0 else 0
        
        stats = f"\n\nüìä **Statistics:**\n"
        stats += f"- Article length: {article_words} words\n"
        stats += f"- Summary length: {summary_words} words\n"
        stats += f"- Compression ratio: {compression:.1f}%\n"
        stats += f"- Word diversity: {diversity:.0f}%\n"
        stats += f"- Generation time: ~{len(article_text)/2000:.1f}s"
        
        return summary + warning + stats
        
    except Exception as e:
        return f"‚ùå Error generating summary: {str(e)}"

# Sample articles (better examples)
sample_articles = [
    # Sample 1: Complex political story (tests abstraction)
    """The United Nations Security Council convened an emergency session late Tuesday to address escalating tensions in the Middle East following reports of cross-border military activity. Diplomats from fifteen member nations engaged in heated debates lasting over six hours, ultimately failing to reach consensus on proposed sanctions. The United States and Russia vetoed competing resolutions, each accusing the other of undermining regional stability. Secretary-General Ant√≥nio Guterres urged all parties to pursue diplomatic solutions, warning that military escalation could trigger a humanitarian crisis affecting millions of civilians. International observers noted this marks the third consecutive month of deadlocked negotiations, raising concerns about the Council's effectiveness.""",
    
    # Sample 2: Technology breakthrough (specific details)
    """Scientists at MIT's Computer Science and Artificial Intelligence Laboratory announced a breakthrough in quantum computing that could revolutionize data encryption. The research team, led by Dr. Sarah Chen, developed a novel algorithm that reduces error rates in quantum calculations by 87 percent compared to previous methods. This advancement addresses one of the field's most persistent challenges: maintaining quantum coherence long enough to perform complex operations. The findings, published in Nature Physics, demonstrate practical applications in cryptography, drug discovery, and climate modeling. Industry experts predict commercial quantum computers incorporating this technology could reach market within five to seven years, potentially disrupting cybersecurity protocols worldwide.""",
    
    # Sample 3: Environmental story (multiple angles)
    """Environmental activists celebrated a landmark victory yesterday when the European Parliament voted overwhelmingly to ban single-use plastics across all member states by 2027. The legislation, which passed with 412 votes in favor and 153 against, targets items including plastic straws, cutlery, plates, and polystyrene food containers. Proponents argue the measure will prevent an estimated 3.4 million tons of plastic waste annually and protect marine ecosystems. However, industry representatives warn of significant economic disruption, particularly for manufacturers employing thousands of workers. The law includes ‚Ç¨10 billion in transition funding to help companies develop sustainable alternatives. Marine biologists praised the decision, noting that current plastic pollution kills over one million seabirds yearly.""",
    
    # Sample 4: Sports upset (narrative story)
    """In one of the biggest upsets in tennis history, unseeded qualifier Emma Ramirez stunned world number one Sofia Petrov 6-4, 7-5 in the Australian Open quarterfinals. The 22-year-old from Spain, ranked 127th globally, displayed extraordinary composure during crucial points, saving five break points in the final game before converting her third match point with a backhand winner down the line. Ramirez's aggressive baseline play and tactical variety consistently troubled the defending champion, who committed 47 unforced errors. This victory marks Ramirez's first-ever win against a top-ten opponent and guarantees her first Grand Slam semifinal appearance. She will face either Jessica Chen or Maria Kowalski, with the winner earning a place in Saturday's final.""",
]

# Build IMPROVED Gradio interface
with gr.Blocks(
    title="BART LoRA Summarizer - Improved",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {font-family: 'Arial', sans-serif;}
    .header {text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;}
    .warning {background: #fff3cd; padding: 10px; border-radius: 5px; border-left: 4px solid #ffc107;}
    """
) as demo:
    
    # Header
    gr.HTML("""
    <div class="header">
        <h1>üì∞ Improved Abstractive Text Summarizer</h1>
        <p><strong>Powered by BART-base + LoRA (Enhanced Decoding)</strong></p>
        <p>Fine-tuned on CNN/DailyMail | Author: asheeradnan | Date: 2025-11-12 18:28:06 UTC</p>
    </div>
    """)
    
    gr.Markdown("""
    ### ‚ÑπÔ∏è About This Model
    
    **What's New in This Version:**
    - ‚ú® **Repetition Penalty:** Reduces copying from source text
    - ‚ú® **Extractiveness Detection:** Warns if summary is too similar to article
    - ‚ú® **Post-Processing:** Removes redundant phrases
    - ‚ú® **Better Parameters:** Tuned for more abstractive summaries
    
    **Model Info:**
    - Architecture: BART (6 encoder + 6 decoder layers)
    - Fine-tuning: LoRA (rank 16) - only 0.63% parameters trained
    - Dataset: 10,000 CNN/DailyMail articles
    - Performance: ROUGE-1: {:.2f}, ROUGE-2: {:.2f}, ROUGE-L: {:.2f}
    """.format(test_results['rouge1'], test_results['rouge2'], test_results['rougeL']))
    
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### üìÑ Input Article")
            
            article_input = gr.Textbox(
                label="",
                placeholder="Paste a news article here (minimum 100 words recommended for best results)...",
                lines=16,
                max_lines=25,
            )
            
            with gr.Accordion("‚öôÔ∏è Generation Settings (Advanced)", open=True):
                with gr.Row():
                    max_length_slider = gr.Slider(
                        minimum=40,
                        maximum=200,
                        value=100,  # Changed from 128
                        step=10,
                        label="Max Summary Length",
                        info="Shorter = more concise, less copying"
                    )
                    
                    min_length_slider = gr.Slider(
                        minimum=20,
                        maximum=80,
                        value=40,  # Changed from 30
                        step=5,
                        label="Min Summary Length",
                        info="Minimum words in summary"
                    )
                
                with gr.Row():
                    beams_slider = gr.Slider(
                        minimum=2,
                        maximum=6,
                        value=3,  # Changed from 4 (fewer beams = more diversity)
                        step=1,
                        label="Number of Beams",
                        info="3-4 recommended (fewer = more creative)"
                    )
                    
                    length_penalty_slider = gr.Slider(
                        minimum=0.8,
                        maximum=2.5,
                        value=1.5,  # Changed from 2.0
                        step=0.1,
                        label="Length Penalty",
                        info="Lower = shorter summaries"
                    )
                
                repetition_penalty_slider = gr.Slider(
                    minimum=1.0,
                    maximum=2.5,
                    value=1.5,  # NEW: Penalize repetition
                    step=0.1,
                    label="Repetition Penalty (NEW)",
                    info="Higher = less copying (1.5-2.0 recommended)"
                )
            
            with gr.Row():
                clear_btn = gr.Button("üóëÔ∏è Clear", variant="secondary")
                summarize_btn = gr.Button("‚ú® Generate Summary", variant="primary", size="lg")
        
        with gr.Column(scale=2):
            gr.Markdown("### üìù Generated Summary")
            
            summary_output = gr.Textbox(
                label="",
                lines=16,
                max_lines=25,
                show_copy_button=True,
            )
            
            gr.Markdown("""
            ### üí° Tips for Better Summaries
            
            **If summary is copying the article:**
            - ‚úÖ Increase **Repetition Penalty** to 1.8-2.0
            - ‚úÖ Decrease **Max Length** to 80-100
            - ‚úÖ Reduce **Beams** to 2-3
            - ‚úÖ Lower **Length Penalty** to 1.2-1.5
            
            **For longer, detailed summaries:**
            - ‚úÖ Increase **Max Length** to 150-180
            - ‚úÖ Increase **Length Penalty** to 2.0-2.5
            - ‚úÖ Keep **Repetition Penalty** at 1.2-1.5
            
            **Best practices:**
            - ‚úÖ Use articles with 150-500 words
            - ‚úÖ Well-structured news articles work best
            - ‚úÖ Very short articles (<100 words) may just be copied
            - ‚ö†Ô∏è Articles >800 words are truncated to 1024 tokens
            """)
    
    # Examples section
    gr.Markdown("### üìö Example Articles (Better Test Cases)")
    gr.Examples(
        examples=[
            [sample_articles[0], 100, 40, 3, 1.5, 1.5],  # Political (complex)
            [sample_articles[1], 100, 40, 3, 1.5, 1.5],  # Technology (detailed)
            [sample_articles[2], 100, 40, 3, 1.5, 1.5],  # Environmental (multi-angle)
            [sample_articles[3], 100, 40, 3, 1.5, 1.5],  # Sports (narrative)
        ],
        inputs=[article_input, max_length_slider, min_length_slider, beams_slider, 
                length_penalty_slider, repetition_penalty_slider],
        outputs=summary_output,
        fn=summarize_article,
        cache_examples=False,
        label="Click an example to load"
    )
    
    # Troubleshooting guide
    with gr.Accordion("üîß Troubleshooting Common Issues", open=False):
        gr.Markdown("""
        ### Problem: Summary is just copying the article opening
        
        **Cause:** Model trained on news articles where lead paragraphs often summarize the story.
        
        **Solutions:**
        1. **Increase repetition penalty** to 1.8-2.2
        2. **Reduce max length** to 80-100 tokens
        3. **Use fewer beams** (2-3 instead of 4-5)
        4. **Try a different article** (some structures are easier to summarize)
        
        ---
        
        ### Problem: Summary is too short or incomplete
        
        **Solutions:**
        1. **Increase min length** to 50-60
        2. **Increase length penalty** to 2.0-2.5
        3. **Increase max length** to 150+
        4. **Use more beams** (4-5)
        
        ---
        
        ### Problem: Summary has repetitive phrases
        
        **Solutions:**
        1. **Increase repetition penalty** to 2.0+
        2. Model already uses **no_repeat_ngram_size=3** (no 3-word phrases repeat)
        3. Post-processing removes some redundancy automatically
        
        ---
        
        ### Why does this happen?
        
        Our model was trained on **10,000 samples** (not full 287k dataset) to save time for the assignment.
        
        **With more training data or higher LoRA rank (32-64), performance would improve significantly.**
        
        For production use:
        - Train on full dataset (287k articles)
        - Use BART-large instead of BART-base
        - Increase LoRA rank to 32-64
        - Add reinforcement learning from human feedback (RLHF)
        """)
    
    # Footer
    gr.HTML("""
    <div style="text-align: center; margin-top: 30px; padding: 20px; background: #f5f5f5; border-radius: 10px;">
        <h3>üéì NLP Assignment: Transformer Fine-Tuning (Improved Version)</h3>
        <p><strong>Task 3:</strong> Encoder-Decoder Architecture for Abstractive Text Summarization</p>
        <p><strong>Model:</strong> BART-base + LoRA (r=16, Œ±=32) + Enhanced Decoding</p>
        <p><strong>Author:</strong> asheeradnan | <strong>Date:</strong> 2025-11-12 18:28:06 UTC</p>
        <p style="margin-top: 10px;">
            <strong>ROUGE Scores:</strong> 
            R-1: {:.2f} | R-2: {:.2f} | R-L: {:.2f}
        </p>
        <p style="margin-top: 10px; font-size: 0.95em;">
            <strong>Improvements in this version:</strong><br>
            ‚ú® Repetition Penalty | ‚ú® Extractiveness Detection | ‚ú® Better Default Parameters
        </p>
        <p style="font-size: 0.9em; color: #666; margin-top: 10px;">
            ‚ö° Powered by Hugging Face Transformers | PEFT | Gradio
        </p>
    </div>
    """.format(test_results['rouge1'], test_results['rouge2'], test_results['rougeL']))
    
    # Button actions
    summarize_btn.click(
        fn=summarize_article,
        inputs=[article_input, max_length_slider, min_length_slider, beams_slider, 
                length_penalty_slider, repetition_penalty_slider],
        outputs=summary_output,
    )
    
    clear_btn.click(
        fn=lambda: ("", ""),
        inputs=None,
        outputs=[article_input, summary_output],
    )

# Launch the interface
print("\n" + "="*80)
print("üöÄ LAUNCHING IMPROVED GRADIO INTERFACE")
print("="*80)

demo.launch(
    share=True,
    debug=True,
    server_name="0.0.0.0",
    server_port=7860,
    show_error=True,
)

print("\n" + "="*80)
print("‚úÖ IMPROVED GRADIO INTERFACE LAUNCHED!")
print("="*80)
print("\nüìù Key Improvements:")
print("  ‚ú® Repetition penalty: Reduces copying")
print("  ‚ú® Better defaults: Shorter, more concise summaries")
print("  ‚ú® Extractiveness warning: Alerts if copying detected")
print("  ‚ú® Troubleshooting guide: Built-in help")
print("\n‚è∞ Launched at: 2025-11-12 18:28:06 UTC")
print("üë§ User: asheeradnan")
print("="*80)