# ROUGE-Based Mapping

Compute mappings between sentences and shor text answers, using different variants of ROUGE and a distance heuristic to pick the sentence closest to the shell noun sentence in case of ties.

In [1]:
import pandas as pd
from mapping_util import long_to_wide, get_prediction

annotators = ["A1", "A2", "A3", "A4", "A5"]
variants = ["rouge1", "rouge2", "rouge3", "rougeL"]
measures = ["precision", "recall", "fmeasure"]


nyt = pd.read_csv("data/gold_data_mapped.csv", index_col=["batch", "file"])
nyt = nyt.groupby(level=[0, 1]).apply(
    lambda f: f.reset_index(drop=True).rename_axis("sent_index")
)
gold = nyt[nyt.is_antecedent].index.get_level_values("sent_index").to_series()

answers = pd.read_csv(
    "data/clean_answers.csv",
    index_col=["batch", "file"],
)

answers = long_to_wide(answers, 5, annotators).answer

df = (
    nyt[["shellnoun", "sentence", "is_sn_sent"]]
    .join(answers)
    .set_index(["shellnoun", "is_sn_sent"], append=True)
)

#### Compute ROUGE scores

In [2]:
from rouge_score import rouge_scorer
import numpy as np

scorer = rouge_scorer.RougeScorer(variants, use_stemmer=True)

vec_score = np.vectorize(scorer.score)

rouge_results = pd.DataFrame(
    vec_score(
        df.filter(like="A").map(str.lower), df.sentence.str.lower().values[:, None]
    ),
    index=df.index,
)

#### Compute antecedent sentence predictions

In [3]:
from operator import itemgetter, attrgetter


def evaluate_(rouge_variant, measure):
    """compute prediction accuracy"""
    scores = rouge_results.map(itemgetter(rouge_variant)).map(attrgetter(measure))
    prediction = get_prediction(scores)

    return prediction.eq(gold.values).mean()


evaluate = np.vectorize(evaluate_)

In [4]:
result = pd.DataFrame(
    evaluate(np.array(variants)[:, None], measures), index=variants, columns=measures
)
result

Unnamed: 0,precision,recall,fmeasure
rouge1,0.58,0.67,0.66
rouge2,0.65,0.68,0.69
rouge3,0.59,0.64,0.61
rougeL,0.58,0.65,0.66
