#### **Student - BƒÉlƒÉcescu Bogdan**

## Reconstruct proses and peotries from "results.jsonl" for using metrics such as BLEU, ROUGE

In [None]:
import json
import re
from collections import defaultdict
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from rouge_score import rouge_scorer


REFERENCE_PATH = "results.jsonl"
GENERATED_PATH = "medium_rogpt2_results.txt"

smoothie = SmoothingFunction().method4
rouge = rouge_scorer.RougeScorer(
    ["rouge1", "rouge2", "rougeL"], use_stemmer=True
)


def tokenize(text):
    return re.findall(r"\b\w+\b", text.lower())

def distinct_n(tokens, n):
    if len(tokens) < n:
        return 0.0
    ngrams = list(zip(*[tokens[i:] for i in range(n)]))
    return len(set(ngrams)) / len(ngrams)


def load_reference_corpus(path):
    poems = defaultdict(list)

    with open(path, encoding="utf-8") as f:
        for line in f:
            obj = json.loads(line)
            poems[obj["title"]].append(
                (obj["verse_index"], obj["verse"])
            )

    corpus = []
    for verses in poems.values():
        verses = sorted(verses, key=lambda x: x[0])
        corpus.append(" ".join(v[1] for v in verses))

    return tokenize(" ".join(corpus))


def load_generated_poems(path):
    poems = []
    with open(path, encoding="utf-8") as f:
        text = f.read()

    blocks = re.split(r"--- Poem \d+ ---", text)[1:]
    for block in blocks:
        poem = block.split("END OF POEM")[0].strip()
        if poem:
            poems.append(poem)

    return poems


def evaluate_poem(poem_text, ref_tokens):
    hyp_tokens = tokenize(poem_text)

    references = [[ref_tokens]] * len(hyp_tokens)
    hypotheses = [[t] for t in hyp_tokens]

    bleu1 = corpus_bleu(references, hypotheses, weights=(1,0,0,0))
    bleu2 = corpus_bleu(references, hypotheses, weights=(0.5,0.5,0,0))
    bleu4 = corpus_bleu(references, hypotheses, weights=(0.25,0.25,0.25,0.25))

    rouge_scores = rouge.score(
        " ".join(ref_tokens),
        poem_text
    )

    return {
        "BLEU-1": round(bleu1, 4),
        "BLEU-2": round(bleu2, 4),
        "BLEU-4": round(bleu4, 4),
        "ROUGE-1": round(rouge_scores["rouge1"].fmeasure, 4),
        "ROUGE-2": round(rouge_scores["rouge2"].fmeasure, 4),
        "ROUGE-L": round(rouge_scores["rougeL"].fmeasure, 4),
        "Distinct-1": round(distinct_n(hyp_tokens, 1), 4),
        "Distinct-2": round(distinct_n(hyp_tokens, 2), 4),
        "Length": len(hyp_tokens)
    }


def main():
    ref_tokens = load_reference_corpus(REFERENCE_PATH)
    gen_poems = load_generated_poems(GENERATED_PATH)

    print(f"üìö Reference tokens: {len(ref_tokens)}")
    print(f"üìù Generated poems: {len(gen_poems)}\n")

    for i, poem in enumerate(gen_poems, 1):
        scores = evaluate_poem(poem, ref_tokens)
        print(f"üìú Poem {i}")
        for k, v in scores.items():
            print(f"{k:12}: {v}")
        print("-" * 40)

if __name__ == "__main__":
    main()


üìö Reference tokens: 12704
üìù Generated poems: 10

üìú Poem 1
BLEU-1      : 0.0
BLEU-2      : 0.0
BLEU-4      : 0.0
ROUGE-1     : 0.0147
ROUGE-2     : 0.0063
ROUGE-L     : 0.01
Distinct-1  : 0.84
Distinct-2  : 0.9798
Length      : 100
----------------------------------------
üìú Poem 2
BLEU-1      : 0.0
BLEU-2      : 0.0
BLEU-4      : 0.0
ROUGE-1     : 0.0166
ROUGE-2     : 0.0088
ROUGE-L     : 0.0116
Distinct-1  : 0.7611
Distinct-2  : 0.9732
Length      : 113
----------------------------------------
üìú Poem 3
BLEU-1      : 0.0
BLEU-2      : 0.0
BLEU-4      : 0.0
ROUGE-1     : 0.0145
ROUGE-2     : 0.0073
ROUGE-L     : 0.0101
Distinct-1  : 0.7677
Distinct-2  : 0.9898
Length      : 99
----------------------------------------
üìú Poem 4
BLEU-1      : 0.0
BLEU-2      : 0.0
BLEU-4      : 0.0
ROUGE-1     : 0.0141
ROUGE-2     : 0.0063
ROUGE-L     : 0.0095
Distinct-1  : 0.8763
Distinct-2  : 1.0
Length      : 97
----------------------------------------
üìú Poem 5
BLEU-1      : 0.0
BLEU