In [None]:
# Import necessary libraries
from datasets import load_from_disk
from google.colab import drive

# 1. Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# 2. Load the dataset from Drive
print("Loading dataset from Google Drive...")
saved_path = "/content/drive/MyDrive/cnn_dailymail_dataset"
cnn_dailymail_dataset = load_from_disk(saved_path)

# 3. Display the dataset info to confirm it's loaded
print("\nDataset loaded successfully:")
print(cnn_dailymail_dataset)

# 4. Inspect the first example
print("\n--- First Example ---")
print(cnn_dailymail_dataset["train"][0])

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading dataset from Google Drive...

Dataset loaded successfully:
DatasetDict({
    train: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 11490
    })
})

--- First Example ---
{'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on 

In [None]:
!pip install -q -U trl peft bitsandbytes

In [None]:
!pip install -q -U bitsandbytes transformers datasets trl peft
# !pip install -q -U transformers datasets trl peft bitsandbytes

In [None]:
# Import all necessary libraries
import os
import torch
import pickle
import pandas as pd
from datasets import load_from_disk
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig
from trl import SFTTrainer
from google.colab import drive

# --- THE FIX IS HERE ---
# Manually set the CUDA device to be visible to the accelerator
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# 1. Mount Google Drive and load the dataset
print("Mounting Google Drive and loading dataset...")
drive.mount('/content/drive')
full_dataset = load_from_disk("/content/drive/MyDrive/cnn_dailymail_dataset")

# 2. Create a smaller subset for faster fine-tuning
train_subset = full_dataset["train"].shuffle(seed=42).select(range(2000))
eval_subset = full_dataset["validation"].shuffle(seed=42).select(range(200))

# 3. Format the data into a prompt template for Gemma
def format_instruction(sample):
	return f"""### Instruction:
Summarize the following news article.

### Input:
{sample['article']}

### Response:
{sample['highlights']}
"""

# 4. Load tokenizer and model
model_id = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

# Configure 4-bit Quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto" # Use 'auto' as 'cuda' can conflict with the env var
)
model.config.use_cache = False

# 5. Configure LoRA
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

# 6. Define Training Arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/gemma_summarizer_run",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    learning_rate=2e-4,
    logging_steps=20,
    save_strategy="epoch",
    report_to="none"
)

# 7. Create the SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_subset,
    eval_dataset=eval_subset,
    args=training_args,
    peft_config=lora_config,
    formatting_func=format_instruction,
)

# 8. Start the fine-tuning run
print("\nStarting Gemma fine-tuning...")
trainer.train()

print("\n--- Gemma fine-tuning complete! ---")

Mounting Google Drive and loading dataset...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Applying formatting function to train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Applying formatting function to eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/200 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



Starting Gemma fine-tuning...


Step,Training Loss
20,2.768
40,2.4948
60,2.4086
80,2.405
100,2.382
120,2.3823



--- Gemma fine-tuning complete! ---


In [None]:
# Import all necessary libraries for this step
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, pipeline
from google.colab import drive
import torch

# 1. Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# 2. Load the fine-tuned model and tokenizer
# This loads the base Gemma model and applies LoRA adapters on top.
model_path = "/content/drive/MyDrive/gemma_summarizer_run/checkpoint-125"
print(f"Loading fine-tuned model from: {model_path}")
model = AutoPeftModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 3. Create a text-generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
print("Pipeline created successfully.")

# 4. Prepare a test article and format it with the prompt template
test_article = """(CNN) -- A woman who died after an e-scooter collision in London has been named as 31-year-old YouTuber and TV presenter Emily Hartridge. Her death was announced on her official Instagram account on Saturday. The statement said: "This is a horrible thing to have to say over Instagram but we know many of you were expecting to see Emily today and this is the only way to contact you all at once. Emily was involved in an accident yesterday and passed away. We all loved her to bits and she will never be forgotten. She has touched so many lives it's hard to imagine things without her. She was a very special person." Hartridge was a well-known online personality in the UK with more than 340,000 subscribers on her YouTube channel. She was also one of the presenters of a British TV show called "Oh S**t I'm 30," which aired on the UK's Channel 4. A representative for Hartridge confirmed to CNN that she died after being involved in a crash between an electric scooter and a truck on Friday. London's Metropolitan Police said in a statement that a woman in her 30s was pronounced dead at the scene of such a collision in southwest London, but did not formally identify her. The statement said the truck driver "has been spoken to by police." No arrests have been made and inquiries continue, police added."""

prompt = f"""### Instruction:
Summarize the following news article.

### Input:
{test_article}

### Response:
"""

# 5. Generate the summary
print("\nGenerating summary...")
outputs = pipe(prompt, max_new_tokens=100, do_sample=False)
generated_text = outputs[0]['generated_text']

# Extract just the response part
summary = generated_text.split("### Response:")[1].strip()

print("\n--- Model's Summary ---")
print(summary)

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading fine-tuned model from: /content/drive/MyDrive/gemma_summarizer_run/checkpoint-125


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Pipeline created successfully.

Generating summary...

--- Model's Summary ---
Emily Hartridge, 31, died after an e-scooter collision in London.
YouTube personality and TV presenter died on Friday.
She was one of the presenters of a British TV show called "Oh S**t I'm 30"


## Evaluation of Gemma Summarizer (FT Gemma-2b-it)

In [None]:
!pip install -q -U evaluate rouge_score bert_score sacrebleu

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m123.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m94.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m53.6 MB/s[0m eta [36m0:0

In [None]:
# Import all necessary libraries
from datasets import load_from_disk
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
from google.colab import drive
import torch
import evaluate
from tqdm import tqdm

"""
Try 2: Instead of splitting the text, let's remove the original prompt
from the beginning of the generated output. This will leave us with just the
summary, regardless of how the model formats it.

"""

# 1. Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# 2. Load fine-tuned Gemma model and tokenizer
model_path = "/content/drive/MyDrive/gemma_summarizer_run/checkpoint-125"
print(f"Loading fine-tuned model from: {model_path}")
model = AutoPeftModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 3. Load the original CNN/DailyMail dataset
print("Loading original CNN/DailyMail dataset...")
full_dataset = load_from_disk("/content/drive/MyDrive/cnn_dailymail_dataset")
test_slice = full_dataset["test"].select(range(100))

# 4. Generate summaries for the test set
print("\nGenerating summaries for the test set...")
model_summaries = []
human_summaries = []

for example in tqdm(test_slice):
    article = example['article']
    human_summary = example['highlights']
    human_summaries.append(human_summary)

    prompt = f"""### Instruction:
Summarize the following news article.

### Input:
{article}

### Response:
"""
    input_ids = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to("cuda")

    with torch.no_grad():
        outputs = model.generate(**input_ids, max_new_tokens=128)
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # --- THE REAL FIX IS HERE ---
        # Remove the original prompt from the start of the generated text
        model_summary = generated_text[len(prompt):].strip()
        model_summaries.append(model_summary)

# 5. Compute the metrics
print("\nComputing evaluation scores...")
rouge = evaluate.load('rouge')
bleu = evaluate.load('bleu')
bertscore = evaluate.load("bertscore")

rouge_scores = rouge.compute(predictions=model_summaries, references=human_summaries)
bleu_scores = bleu.compute(predictions=model_summaries, references=[[ref] for ref in human_summaries])
bertscore_scores = bertscore.compute(predictions=model_summaries, references=human_summaries, lang="en")

print("\n--- Evaluation Complete ---")
print("\nROUGE Scores:")
print(rouge_scores)
print("\nBLEU Score:")
print(bleu_scores)
print("\nBERTScore (mean values):")
print({
    "precision": sum(bertscore_scores['precision']) / len(bertscore_scores['precision']),
    "recall": sum(bertscore_scores['recall']) / len(bertscore_scores['recall']),
    "f1": sum(bertscore_scores['f1']) / len(bertscore_scores['f1']),
})

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading fine-tuned model from: /content/drive/MyDrive/gemma_summarizer_run/checkpoint-125


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading original CNN/DailyMail dataset...

Generating summaries for the test set...


100%|██████████| 100/100 [05:48<00:00,  3.48s/it]



Computing evaluation scores...


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return forward_call(*args, **kwargs)



--- Evaluation Complete ---

ROUGE Scores:
{'rouge1': np.float64(0.2724245233109819), 'rouge2': np.float64(0.11358594057754887), 'rougeL': np.float64(0.2081200979305151), 'rougeLsum': np.float64(0.254887922470585)}

BLEU Score:
{'bleu': 0.09701292467862943, 'precisions': [0.305591677503251, 0.11367861885790173, 0.06295793758480325, 0.04049930651872399], 'brevity_penalty': 1.0, 'length_ratio': 1.027525387493319, 'translation_length': 3845, 'reference_length': 3742}

BERTScore (mean values):
{'precision': 0.7019999670982361, 'recall': 0.7086302155256271, 'f1': 0.7051729357242584}


