In [1]:
# ==========================================
# CELL 1: ENVIRONMENT SETUP (UPDATED)
# ==========================================
# We use -U to force the latest compatible versions of all libraries
!pip install -q -U transformers accelerate bitsandbytes peft datasets evaluate rouge_score

print("✅ Installation Complete.")
print("⚠️ IF YOU SEE ERRORS ABOVE: Go to 'Runtime > Restart session' and run this cell again.")

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m137.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.9/380.9 kB[0m [31m37.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m556.4/556.4 kB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.3/512.3 kB[0m [31m39.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# ==========================================
# CELL 2: HUGGING FACE AUTHENTICATION
# ==========================================
from huggingface_hub import login

print("Please paste your Hugging Face token in the box below:")
login()

Please paste your Hugging Face token in the box below:


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
# ==========================================
# CELL 3: LOAD QUANTIZED MODEL
# ==========================================
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig

# 1. Define Model ID
model_id = "google/flan-t5-base"

# 2. Configure 4-bit NormalFloat (NF4) Quantization
# This aligns with your proposal's goal of "Resource Efficiency"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",            # The specific 4-bit format
    bnb_4bit_compute_dtype=torch.float16, # Compute in half-precision (speed)
    bnb_4bit_use_double_quant=True        # Double quantization for memory savings
)

# 3. Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# 4. Load Model
print(f"Loading {model_id} with 4-bit quantization...")
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto" # Automatically assigns layers to the T4 GPU
)

print(f"✅ Model loaded on {model.device} with 4-bit weights.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading google/flan-t5-base with 4-bit quantization...


generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

✅ Model loaded on cuda:0 with 4-bit weights.


In [4]:
# ==========================================
# CELL 4: LORA ADAPTER CONFIGURATION
# ==========================================
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training

# 1. Prepare model for k-bit training (stabilizes the quantized weights)
model = prepare_model_for_kbit_training(model)

# 2. Define LoRA Config
# We target 'q' (Query) and 'v' (Value) attention layers as per standard practice
lora_config = LoraConfig(
    r=16,                  # Rank: Higher = more parameters, better learning
    lora_alpha=32,         # Scaling factor
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

# 3. Inject LoRA Adapters
model = get_peft_model(model, lora_config)

# 4. Verification
model.print_trainable_parameters()
# Expected output: "trainable params: ~1.7M || all params: ~250M || trainable%: ~0.6%"

trainable params: 1,769,472 || all params: 249,347,328 || trainable%: 0.7096


In [6]:
# ==========================================
# CELL 5: DATA PREPARATION (CORRECTED)
# ==========================================
from datasets import load_dataset
import numpy as np

# 1. Load SAMSum from the reliable mirror
print("Loading SAMSum dataset...")
try:
    # This mirror is currently the most stable version of SAMSum on the Hub
    dataset = load_dataset("knkarthick/samsum")
    print("✅ SAMSum loaded successfully from 'knkarthick/samsum'")
except Exception as e:
    print(f"❌ Error loading mirror: {e}")
    # Fallback to DialogSum if SAMSum is completely inaccessible
    print("Falling back to 'knkarthick/dialogsum' (similar dialogue data)...")
    dataset = load_dataset("knkarthick/dialogsum")

# 2. Preprocessing Function
# We prefix inputs with "Summarize: " for Flan-T5
def preprocess_function(sample):
    # Prefix input
    inputs = ["Summarize the following conversation:\n" + doc for doc in sample["dialogue"]]

    # Tokenize Input
    model_inputs = tokenizer(inputs, max_length=1024, padding="max_length", truncation=True)

    # Tokenize Target (Summary)
    labels = tokenizer(text_target=sample["summary"], max_length=128, padding="max_length", truncation=True)

    # Replace padding token ID with -100 (ignored by loss function)
    labels["input_ids"] = [
        [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
    ]

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# 3. Apply Processing
print("Tokenizing dataset (this may take 1-2 minutes)...")
tokenized_dataset = dataset.map(preprocess_function, batched=True)

print(f"✅ Data ready! Train size: {len(tokenized_dataset['train'])}")

Loading SAMSum dataset...


README.md: 0.00B [00:00, ?B/s]

train.csv: 0.00B [00:00, ?B/s]

validation.csv: 0.00B [00:00, ?B/s]

test.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/14731 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/818 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/819 [00:00<?, ? examples/s]

✅ SAMSum loaded successfully from 'knkarthick/samsum'
Tokenizing dataset (this may take 1-2 minutes)...


Map:   0%|          | 0/14731 [00:00<?, ? examples/s]

Map:   0%|          | 0/818 [00:00<?, ? examples/s]

Map:   0%|          | 0/819 [00:00<?, ? examples/s]

✅ Data ready! Train size: 14731


In [8]:
# ==========================================
# CELL 6: TRAINING EXECUTION
# ==========================================
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
import evaluate

# 1. Metrics (ROUGE)
metric = evaluate.load("rouge")

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple): preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    # Replace -100 in labels
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute ROUGE
    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    return {k: round(v * 100, 4) for k, v in result.items()}

# 2. Trainer Configuration
training_args = Seq2SeqTrainingArguments(
    output_dir="flan-t5-samsum-lora-final",
    per_device_train_batch_size=8,    # Standard for T4 GPU
    per_device_eval_batch_size=8,
    learning_rate=1e-3,               # High LR is good for LoRA
    num_train_epochs=1,               # 1 Epoch is usually sufficient for this task
    logging_steps=100,
    save_strategy="epoch",
    eval_strategy="epoch",
    predict_with_generate=True,       # Generate summaries during eval
    report_to="tensorboard"
)

# 3. Initialize Trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model),
    compute_metrics=compute_metrics
)

# 4. Start Training
print("🚀 Starting Training...")
trainer.train()

Downloading builder script: 0.00B [00:00, ?B/s]

🚀 Starting Training...


  return fn(*args, **kwargs)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,1.4605,1.404691,47.1421,23.1956,39.5488,39.5092


TrainOutput(global_step=1842, training_loss=1.4878273010253906, metrics={'train_runtime': 5079.3354, 'train_samples_per_second': 2.9, 'train_steps_per_second': 0.363, 'total_flos': 2.0334471187267584e+16, 'train_loss': 1.4878273010253906, 'epoch': 1.0})

In [9]:
# ==========================================
# CELL 7: SAVE TO GOOGLE DRIVE
# ==========================================
from google.colab import drive

# 1. Mount Drive (Pop-up will ask for permission)
drive.mount('/content/drive')

# 2. Define the save path
save_path = "/content/drive/My Drive/FinalProject_FlanT5_Samsum"
print(f"Saving model to {save_path}...")

# 3. Save the adapter and the tokenizer
trainer.model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print("✅ Model saved successfully!")
print(f"You can check your Google Drive folder to see the files.")

Mounted at /content/drive
Saving model to /content/drive/My Drive/FinalProject_FlanT5_Samsum...
✅ Model saved successfully!
You can check your Google Drive folder to see the files.


In [11]:
# ==========================================
# CELL 8: TEST THE MODEL (INFERENCE) - FIXED
# ==========================================
from transformers import GenerationConfig

# 1. Create a sample conversation
sample_dialogue = """
Joey: Hey, are we still on for the movies tonight?
Chandler: Yeah, but I might be a little late. Work is crazy.
Joey: No worries. Ross said he's coming too.
Chandler: Oh great. Is he bringing meaningful conversation or dinosaurs?
Joey: Dinosaurs, obviously.
Chandler: Fine. I'll meet you guys at 8 PM in front of the theater.
"""

# 2. Format inputs and move to GPU
input_text = "Summarize the following conversation:\n" + sample_dialogue
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

# 3. Generate Summary
# CHANGE IS HERE: We added 'input_ids=' to make it a keyword argument
outputs = model.generate(
    input_ids=inputs.input_ids,
    generation_config=GenerationConfig(max_new_tokens=100, num_beams=5)
)

text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("-" * 30)
print(f"DIALOGUE:\n{sample_dialogue}")
print("-" * 30)
print(f"MODEL SUMMARY:\n{text_output}")
print("-" * 30)

------------------------------
DIALOGUE:

Joey: Hey, are we still on for the movies tonight?
Chandler: Yeah, but I might be a little late. Work is crazy.
Joey: No worries. Ross said he's coming too.
Chandler: Oh great. Is he bringing meaningful conversation or dinosaurs?
Joey: Dinosaurs, obviously.
Chandler: Fine. I'll meet you guys at 8 PM in front of the theater.

------------------------------
MODEL SUMMARY:
Chandler will meet Joey and Ross at 8 PM in front of the theater to see Dinosaurs.
------------------------------


In [16]:
# Install the missing library
!pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [18]:
# ==========================================
# CELL 9 (FIXED): GET BERTSCORE (NO TRAINING)
# ==========================================
import evaluate
import numpy as np
import torch
from transformers import GenerationConfig

# 1. Install Library (Just in case)
!pip install -q bert_score

# 2. Load Metric
print("⏳ Loading BERTScore metric...")
bertscore = evaluate.load("bertscore")

# 3. Generate Summaries for a slice of the test set
# We take 50 examples to be fast
print("Generating summaries for scoring (this takes ~1-2 mins)...")
test_slice = tokenized_dataset["test"].select(range(50))
input_ids = torch.tensor(test_slice["input_ids"]).to("cuda")

# --- FIX IS HERE ---
# We must use "input_ids=" explicitly
summary_ids = model.generate(
    input_ids=input_ids,  # <--- This was the missing part
    generation_config=GenerationConfig(max_new_tokens=100, num_beams=2)
)
# -------------------

decoded_preds = tokenizer.batch_decode(summary_ids, skip_special_tokens=True)

# 4. Get References (Human Summaries)
# Replace -100 with padding to decode
labels = np.where(np.array(test_slice["labels"]) != -100, test_slice["labels"], tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

# 5. Compute the Score
print("Computing BERTScore...")
bert_result = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")
final_bertscore = np.mean(bert_result["f1"]) * 100

print("-" * 30)
print(f"✅ BERTScore (F1): {final_bertscore:.4f}")
print("-" * 30)

⏳ Loading BERTScore metric...
Generating summaries for scoring (this takes ~1-2 mins)...
Computing BERTScore...


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


------------------------------
✅ BERTScore (F1): 90.9513
------------------------------


In [14]:
# ==========================================
# FINAL PROJECT DEMO: INTERACTIVE APP
# ==========================================

# 1. Install necessary libraries
# We need Gradio for the interface and Transformers for the model
!pip install -q gradio transformers peft torch

import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
from peft import PeftModel, PeftConfig
from google.colab import drive

# 2. Mount Drive to access your trained model
# (If you are already mounted, this will just say "Drive already mounted")
drive.mount('/content/drive')

# 3. Load the Model (The "Baked Cake")
print("Loading your fine-tuned model from Drive... Please wait.")

# Path where we saved it earlier
save_path = "/content/drive/My Drive/FinalProject_FlanT5_Samsum"

# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(save_path)

# Load Base Model (Google Flan-T5)
base_model = AutoModelForSeq2SeqLM.from_pretrained(
    "google/flan-t5-base",
    device_map="auto" if torch.cuda.is_available() else "cpu"
)

# Load Your Learned Adapters (LoRA)
model = PeftModel.from_pretrained(base_model, save_path)
model.eval() # Set to evaluation mode

print("✅ Model loaded successfully!")

# 4. Define the Summarization Function
def summarize_dialogue(input_text):
    # Add the prefix we used during training
    prompt = "Summarize the following conversation:\n" + input_text

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt")

    # Move to GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    inputs = inputs.to(device)

    # Generate Summary
    # We use beam search (num_beams=5) for high-quality text
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            generation_config=GenerationConfig(
                max_new_tokens=150,
                num_beams=5,
                repetition_penalty=2.5, # Prevents repeating phrases
                early_stopping=True
            )
        )

    # Decode result
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

# 5. Create the Web Interface
# This builds the visual "App" your professor will see
demo = gr.Interface(
    fn=summarize_dialogue,
    inputs=gr.Textbox(
        lines=10,
        placeholder="Paste a dialogue here (e.g., WhatsApp chat)...",
        label="Input Dialogue"
    ),
    outputs=gr.Textbox(label="Generated Summary"),
    title="Dialogue Summarization System",
    description="Final Project Demo: Fine-Tuned Flan-T5 with LoRA Adapters.",
    examples=[
        # Example 1
        ["""Joey: Hey, are we still on for the movies tonight?
Chandler: Yeah, but I might be a little late. Work is crazy.
Joey: No worries. Ross said he's coming too.
Chandler: Oh great. Is he bringing meaningful conversation or dinosaurs?
Joey: Dinosaurs, obviously.
Chandler: Fine. I'll meet you guys at 8 PM."""],

        # Example 2
        ["""Customer: Hi, I bought a laptop yesterday but it won't turn on.
Support: I'm sorry to hear that. Is the charging light on when you plug it in?
Customer: No, nothing happens.
Support: It sounds like a defective battery. Please bring it to the store for a replacement.
Customer: Okay, I will come tomorrow morning. Thanks."""]
    ]
)

# 6. Launch the App
print("Launching Demo...")
demo.launch(share=True, debug=True)

[31mERROR: Operation cancelled by user[0m[31m
[0mDrive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading your fine-tuned model from Drive... Please wait.
✅ Model loaded successfully!
Launching Demo...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://3a65e7d352f20c8b89.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://3a65e7d352f20c8b89.gradio.live


