In [2]:
import pandas as pd
from pathlib import Path

grouped_path   = Path("../data/processed/grouped_summary.csv")
label_path     = Path("../data/processed/summary_labeled.csv")
base_path      = Path("../data/processed/final_summary_based.csv")
finetuned_path = Path("../data/processed/final_summary_finetuned.csv")

df_grouped   = pd.read_csv(grouped_path)
df_labels    = pd.read_csv(label_path)
df_base      = pd.read_csv(base_path)
df_finetuned = pd.read_csv(finetuned_path)

# rename columns so they’re clear
df_labels    = df_labels.rename(columns={"summary": "summary_ref"})
df_base      = df_base.rename(columns={"summary": "summary_base"})

# merge step by step on app_id + app_name
df_eval = df_grouped.merge(df_labels,    on=["app_id", "app_name"], how="inner")
df_eval = df_eval.merge(df_base,         on=["app_id", "app_name"], how="inner")
df_eval = df_eval.merge(df_finetuned,    on=["app_id", "app_name"], how="inner")

print(df_eval.columns)
print("Eval rows:", len(df_eval))
df_eval.head()


Index(['app_id', 'app_name', 'all_reviews', 'summary_ref', 'summary_base',
       'summary_finetuned'],
      dtype='object')
Eval rows: 50


Unnamed: 0,app_id,app_name,all_reviews,summary_ref,summary_base,summary_finetuned
0,10,Counter-Strike,DO NOT BUY! Steam is up to their old tricks ag...,Counter-Strike is widely regarded as a classic...,"– It's a good game, butu must to make strong v...",– I like old games I mean I grew up playing th...
1,20,Team Fortress Classic,tryed playing this game when i boot it up im o...,Team Fortress Classic is regarded by many as a...,"– Team Fortress Classic isn't a game, but it's...",– Team Fortress Classic isn't worth the 5 00 t...
2,30,Day of Defeat,There was a time when DoF was great Unfortunat...,Reviewers generally find Day of Defeat to be a...,"– Day of Defeat is a Valve game, but it's not ...",– It was a great FPS in its time however with ...
3,40,Deathmatch Classic,Deathmatch Classic is a multiplayer DM action ...,Deathmatch Classic is widely regarded as a fai...,– Deathmatch Classic isn't the only game that'...,Deathmatch Classic is a multiplayer DM action ...
4,50,Half-Life: Opposing Force,I m assuming most of the reviews are made from...,Half-Life: Opposing Force receives mixed revie...,"– A game that has a lot of glitches, but isn't...",– This isn't the first time I've seen a game t...


In [4]:
import evaluate

rouge = evaluate.load("rouge")

def compute_rouge(preds, refs):
    # clean up NAs
    preds = [p if isinstance(p, str) else "" for p in preds]
    refs  = [r if isinstance(r, str) else "" for r in refs]

    preds = [p.strip() for p in preds]
    refs  = [r.strip() for r in refs]

    scores = rouge.compute(predictions=preds, references=refs, use_stemmer=True)
    # make it % and round
    return {k: round(v * 100, 2) for k, v in scores.items()}

base_scores = compute_rouge(df_eval["summary_base"],      df_eval["summary_ref"])
ft_scores   = compute_rouge(df_eval["summary_finetuned"], df_eval["summary_ref"])

print("Baseline flan-t5-small vs reference:")
print(base_scores)

print("\nLoRA-finetuned flan-t5-small vs reference:")
print(ft_scores)


Baseline flan-t5-small vs reference:
{'rouge1': np.float64(24.14), 'rouge2': np.float64(3.77), 'rougeL': np.float64(14.4), 'rougeLsum': np.float64(14.4)}

LoRA-finetuned flan-t5-small vs reference:
{'rouge1': np.float64(23.84), 'rouge2': np.float64(3.81), 'rougeL': np.float64(13.11), 'rougeLsum': np.float64(13.11)}


In [None]:
import random

for i in random.sample(range(len(df_eval)), 5):
  row = df_eval.iloc[i]
  print("=" * 80)
  print(f"Game: {row['app_name']} (app_id={row['app_id']})\n")
  print("REVIEWS (truncated):")
  print(row["all_reviews"][:500], "...\n")
  print("REFERENCE (Gemini):")
  print(row["summary_ref"], "\n")
  print("BASELINE FLAN:")
  print(row["summary_base"], "\n")
  print("FINETUNED LoRA:")
  print(row["summary_finetuned"], "\n")


Game: DEFCON (app_id=1520)

REVIEWS (truncated):
Such a simple title should be playable on a touch screen! It isn t! ||| i just couldnt turn my enemies cities into a living hell fast enough also kind of expensive lack of content ||| Buy this game from Steam and you might face failed game key authentication issues Neither the developer or Steam are willing to take responsibility and offer a fix I m unable to play the game at all ||| I accidentally left this open for 16 hours finally tried it and got bored ||| to anyone looking to purchase the g ...

REFERENCE (Gemini):
DEFCON is largely praised for its brilliant atmosphere, successfully conveying the tension and horror of global thermonuclear war. Players appreciate its strategic gameplay, which some find deeply involving with challenging AI, while its retro, 80s-inspired graphics and grim, scary soundtrack are consistently lauded for enhancing the immersive experience. However, some users found the gameplay to be too simple, repetitive