##Basic setup & imports

In [1]:
import os
import sys


project_root = os.getcwd()
src_path = os.path.join(project_root, "src")
if src_path not in sys.path:
    sys.path.append(src_path)

print("Project root:", project_root)
print("Src path:",  src_path)

Project root: c:\Users\birke\OneDrive\Desktop\projects\hw6_mini_summarizer\hw6-mini-summarizer
Src path: c:\Users\birke\OneDrive\Desktop\projects\hw6_mini_summarizer\hw6-mini-summarizer\src


In [None]:


# Now import  modules
from dataset_utils import load_data, train_test_split
from evaluate import run_baseline_on_dataset, compute_rouge_scores, average_rouge
from model_pipeline import load_summarizer, run_model_on_dataset
from build_dataset import build_dataset, clean_text

ModuleNotFoundError: No module named 'rouge_score'

##build dataset

In [None]:
build_dataset()

##Load and split data

In [None]:
data_path = "data/summarization_data.json"
examples = load_data(data_path)

len(examples), examples[0]


In [None]:
train_examples = [ex for ex in examples if ex.get("split") == "train"]
test_examples  = [ex for ex in examples if ex.get("split") == "test"]

len(train_examples), len(test_examples)


##Prepare gold summaries

In [None]:
gold_summaries = [ex["summary"] for ex in test_examples]
len(gold_summaries)


##Run baseline on test set

In [None]:
baseline_preds = run_baseline_on_dataset(test_examples)

len(baseline_preds), baseline_preds[0]


##Load the model

In [None]:
summarizer = load_summarizer(device=0)  # 0 = GPU, -1 = CPU


In [None]:
#quick test
test_text = test_examples[0]["document"][:1000]  # truncate for speed
summary_example = summarizer(test_text, max_new_tokens=60, min_length=15, truncation=True)[0]["summary_text"]
summary_example


##Run Model on full test set

In [None]:
model_preds = run_model_on_dataset(
    summarizer,
    test_examples,
    min_len=15,
    max_new_tokens=60
)

len(model_preds), model_preds[0]


In [None]:
#see example
i = 0
print("GOLD SUMMARY:\n", gold_summaries[i], "\n")
print("MODEL SUMMARY:\n", model_preds[i])


##Compute ROUGE for baseline and model

In [None]:
# Baseline ROUGE
baseline_scores = compute_rouge_scores(gold_summaries, baseline_preds)
baseline_avg = average_rouge(baseline_scores)


# Model ROUGE
model_scores = compute_rouge_scores(gold_summaries, model_preds)
model_avg = average_rouge(model_scores)


def print_rouge(name, avg):
    print(f"{name}:")
    print(f"  ROUGE-1 F: {avg['rouge1']:.4f}")
    print(f"  ROUGE-L F: {avg['rougeL']:.4f}")

print_rouge("Baseline", baseline_avg)
print_rouge("Model", model_avg)


#View graphs

In [None]:
baseline_r1 = [s["rouge1"].fmeasure for s in baseline_scores]
model_r1    = [s["rouge1"].fmeasure for s in model_scores]

len(baseline_r1), len(model_r1)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

improvements = np.array(model_r1) - np.array(baseline_r1)

plt.figure(figsize=(6, 4))
plt.hist(improvements, bins=15)

plt.xlabel("ROUGE-1 Improvement (Model - Baseline)")
plt.ylabel("Number of Examples")
plt.title("Distribution of ROUGE-1 Improvement")

plt.axvline(0.0, linestyle="--")  # zero line

plt.tight_layout()
plt.show()

print("Mean improvement:", improvements.mean())
print("Min improvement:", improvements.min())
print("Max improvement:", improvements.max())


In [None]:
def text_length(text: str) -> int:
    return len(text.split())  # word count

doc_lengths = [text_length(ex["document"]) for ex in test_examples]

plt.figure(figsize=(6, 4))
plt.scatter(doc_lengths, model_r1, alpha=0.7)

plt.xlabel("Document Length (words)")
plt.ylabel("ROUGE-1 F-measure (Model)")
plt.title("Model ROUGE-1 vs Document Length")

plt.tight_layout()
plt.show()


#save predictions

In [None]:
import json

output = {
    "gold": gold_summaries,
    "baseline": baseline_preds,
    "model": model_preds,
}

with open("data/predictions.json", "w", encoding="utf-8") as f:
    json.dump(output, f, ensure_ascii=False, indent=2)

print("Saved predictions to data/predictions.json")


##Pick random Examples

In [None]:
import random
random.seed(0)
indices = random.sample(range(len(test_examples)), 3)
indices


In [None]:
for i in indices:
    print("=" * 80)
    print(f"Example {i}")
    print("\nDOCUMENT:\n", test_examples[i]["document"][:800], "...\n")
    print("GOLD SUMMARY:\n", gold_summaries[i], "\n")
    print("BASELINE SUMMARY:\n", baseline_preds[i], "\n")
    print("MODEL SUMMARY:\n", model_preds[i], "\n")
