The purpose of this Notebook is to compute the ROUGE scores using the reference summaries

In [23]:
import pandas as pd
import numpy as np
from rouge_score import rouge_scorer
import re

In [27]:
import string

In [14]:
import nltk
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

In [2]:
import json

In [24]:
def tokenize(sentence):
    #INVALID_POS = ["CC", "CD", "DT", "EX", "IN", "LS", "PDT", "POS", "PRP", "PRP$", "RP", "TO", "WDT", "WP", "WRB"]
    INVALID_POS = ["CC", "CD", "DT", "EX", "IN", "LS", "PDT", "POS", "PRP", "PRP$", "RP", "TO", "WDT", "WP", "WRB"]
    sentence = re.sub(f"[{re.escape(string.punctuation)}\…]+", " ", sentence)
    # Filter common words
    tokens = nltk.pos_tag(sentence.split())
    tokens_ = tokens
    tokens = [tok for (tok, pos) in tokens if tok.lower() not in stop_words and pos not in INVALID_POS]
    return tokens

In [25]:
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'])

In [4]:
refs = pd.read_csv("./reference_summaries.csv")
refs.shape

(200, 5)

In [21]:
def get_scores(path, refs):
    df = pd.read_csv(path)
    df = pd.merge(df, refs, how="inner", on="prod_id")
    df["summary"] = df["summary"].replace(np.nan, "")
    all_scores = []
    all_scores_with_prod_id = dict()
    for i,prod_id in enumerate(df["prod_id"].unique()):
        data = df.loc[df["prod_id"] == prod_id]
        gen_summ = data["summary"].values[0]
        ref_summ = [data["summ_1"].values[0], data["summ_2"].values[0], data["summ_3"].values[0]]
        prod_scores = []
        for rs in ref_summ:
            #ref = ' '.join([str(word) for word in self.nlp(str(ref)) if str(word) not inself.stopwords])
            rs = " ".join(tokenize(rs))
            gen_summ_ = " ".join(tokenize(gen_summ))
            scores = scorer.score(gen_summ_, rs)
            r1_p, r1_r, r1_f = scores["rouge1"]
            r2_p, r2_r, r2_f = scores["rouge2"]
            rL_p, rL_r, rL_f = scores["rougeL"]
            prod_scores.append([[r1_p, r1_r, r1_f], [r2_p, r2_r, r2_f], [rL_p, rL_r, rL_f]])
        
        prod_score_arr = np.array(prod_scores)
        all_scores.append(prod_scores)
        all_scores_with_prod_id[prod_id] = {
            "rouge1": {
                "precision": {"mean": round(prod_score_arr.mean(0)[0][0], 5), "max": round(prod_score_arr.max(0)[0][0], 5), "min": round(prod_score_arr.min(0)[0][0], 5)},
                "recall": {"mean": round(prod_score_arr.mean(0)[0][1], 5), "max": round(prod_score_arr.max(0)[0][1], 5), "min": round(prod_score_arr.min(0)[0][1], 5)},
                "fscore": {"mean": round(prod_score_arr.mean(0)[0][1], 5), "max": round(prod_score_arr.max(0)[0][2], 5), "min": round(prod_score_arr.min(0)[0][2], 5)}
            },
            "rouge2": {
                "precision": {"mean": round(prod_score_arr.mean(0)[1][0], 5), "max": round(prod_score_arr.max(0)[1][0], 5), "min": round(prod_score_arr.min(0)[1][0], 5)},
                "recall": {"mean": round(prod_score_arr.mean(0)[1][1], 5), "max": round(prod_score_arr.max(0)[1][1], 5), "min": round(prod_score_arr.min(0)[1][1], 5)},
                "fscore": {"mean": round(prod_score_arr.mean(0)[1][2], 5), "max": round(prod_score_arr.max(0)[1][2], 5), "min": round(prod_score_arr.min(0)[1][2], 5)}
            },
            "rougeL": {
                "precision": {"mean": round(prod_score_arr.mean(0)[2][0], 5), "max": round(prod_score_arr.max(0)[2][0], 5), "min": round(prod_score_arr.min(0)[2][0], 5)},
                "recall": {"mean": round(prod_score_arr.mean(0)[2][1], 5), "max": round(prod_score_arr.max(0)[2][1], 5), "min": round(prod_score_arr.min(0)[2][1], 5)},
                "fscore": {"mean": round(prod_score_arr.mean(0)[2][2], 5), "max": round(prod_score_arr.max(0)[2][2], 5), "min": round(prod_score_arr.min(0)[2][2], 5)}
            }
        }
        
    all_scores = np.array(all_scores)
    mean_scores = all_scores.mean(1).mean(0)
    max_scores = all_scores.max(1).mean(0)
    min_scores = all_scores.min(1).mean(0)
    
    output = {
        "rouge1": {
            "precision": {"mean": round(mean_scores[0][0], 5), "max": round(max_scores[0][0], 5), "min": round(min_scores[0][0], 5)},
            "recall": {"mean": round(mean_scores[0][1], 5), "max": round(max_scores[0][1], 5), "min": round(min_scores[0][1], 5)},
            "fscore": {"mean": round(mean_scores[0][2], 5), "max": round(max_scores[0][2], 5), "min": round(min_scores[0][2], 5)}
        },
        "rouge2": {
            "precision": {"mean": round(mean_scores[1][0], 5), "max": round(max_scores[1][0], 5), "min": round(min_scores[1][0], 5)},
            "recall": {"mean": round(mean_scores[1][1], 5), "max": round(max_scores[1][1], 5), "min": round(min_scores[1][1], 5)},
            "fscore": {"mean": round(mean_scores[1][2], 5), "max": round(max_scores[1][2], 5), "min": round(min_scores[1][2], 5)}
        },
        "rougeL": {
            "precision": {"mean": round(mean_scores[2][0], 5), "max": round(max_scores[2][0], 5), "min": round(min_scores[2][0], 5)},
            "recall": {"mean": round(mean_scores[2][1], 5), "max": round(max_scores[2][1], 5), "min": round(min_scores[2][1], 5)},
            "fscore": {"mean": round(mean_scores[2][2], 5), "max": round(max_scores[2][2], 5), "min": round(min_scores[2][2], 5)}
        }
    }
    
    return output, all_scores_with_prod_id

In [7]:
def metrics_csv(model, scores):
    df = []
    for prod_id in scores.keys():
        data = scores[prod_id]
        row = dict()
        row["model"] = model
        row["prod_id"] = prod_id
        for k,v in zip(["r1", "r2", "rL"], ["rouge1", "rouge2", "rougeL"]):
            for m in ["precision", "recall", "fscore"]:
                row[f"{k}_{m}_mean"] = data[v][m]["mean"]
                row[f"{k}_{m}_max"] = data[v][m]["max"]
                row[f"{k}_{m}_min"] = data[v][m]["min"]
        df.append(row)
    df = pd.DataFrame(df)
    return df

### Save ROUGE scores in Json files

In [59]:
gpt2_scores, gpt2_all_scores_with_prod_id = get_scores(path="gpt2_summaries.csv", refs=refs)
with open('gpt2_avg_scores.json', 'w') as f:
    json.dump(gpt2_scores, f)
    
gpt2_scores_csv = metrics_csv("gpt2", gpt2_all_scores_with_prod_id)