In [1]:
import pandas as pd
from huggingface_hub import InferenceClient
from tqdm import tqdm
from bert_score import score
from rouge_score import rouge_scorer

In [4]:
path = "../Data/ToS_with_summary.json"  
text = "plain_text"                   
summary = "summary"             
n = 5 

In [5]:
df = pd.read_json(path, lines=True)
df = df[[text, summary]].dropna().sample(n)
# df.head()

In [6]:
def build_prompt(text):
    return f"""Summarize the following terms of service clearly and concisely:

{text}
"""

In [7]:
hf_token = "******************************"
model_id = "facebook/bart-large-cnn"  # or another one
client = InferenceClient(model=model_id, token=hf_token)

In [None]:
generated = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    prompt = build_prompt(row[text])
    try:
        response = client.text_generation(prompt, max_new_tokens=400, temperature=0.5)
        generated.append({
            "original": row[text][:300] + "...",
            "ground_truth": row[summary],
            "generated_summary": response
        })
    except Exception as e:
        print(f"❌ Error on row {i}: {e}")
        if "response" in locals():
            print(f"ROW {i} OUTPUT TYPE: {type(response)}")
            print(response if isinstance(response, str) else response.keys())
        continue

 20%|██        | 1/5 [00:00<00:02,  1.46it/s]

❌ Error on row 204: 503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn


 60%|██████    | 3/5 [00:02<00:01,  1.62it/s]

❌ Error on row 728: 503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn
❌ Error on row 494: 503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn


100%|██████████| 5/5 [00:02<00:00,  2.06it/s]

❌ Error on row 681: 503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn
❌ Error on row 742: 503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn





: 

In [None]:
# pd.set_option("display.max_colwidth", None)
pd.reset_option("display.max_colwidth")


In [None]:
results_df = pd.DataFrame(generated)
results_df.to_csv("sample_summaries.csv", index=False)

print(results_df[["ground_truth", "generated_summary"]].head(1))

KeyError: "None of [Index(['ground_truth', 'generated_summary'], dtype='object')] are in the [columns]"

In [None]:
rouge = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

In [None]:
def evaluate_summary(gt, gen):
    rouge_scores = rouge.score(gt, gen)
    
    # BERTScore (returns list, so grab the first item)
    P, R, F1 = score([gen], [gt], model_type="roberta_large", lang="en", verbose=False, use_auth_token=hf_token)

    return {
        "rouge1_f1": round(rouge_scores["rouge1"].fmeasure, 4),
        "rougeL_f1": round(rouge_scores["rougeL"].fmeasure, 4),
        "bertscore_f1": round(F1[0].item(), 4)
    }

In [None]:
eval_results = results_df.apply(
    lambda row: evaluate_summary(row["ground_truth"], row["generated_summary"]),
    axis=1, result_type="expand"
)