The purpose of this Notebook is to compute the ROUGE scores using the reference summaries

In [23]:
import pandas as pd
import numpy as np
from rouge_score import rouge_scorer
import re

In [27]:
import string

In [14]:
import nltk
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

In [2]:
import json

In [24]:
def tokenize(sentence):
    #INVALID_POS = ["CC", "CD", "DT", "EX", "IN", "LS", "PDT", "POS", "PRP", "PRP$", "RP", "TO", "WDT", "WP", "WRB"]
    INVALID_POS = ["CC", "CD", "DT", "EX", "IN", "LS", "PDT", "POS", "PRP", "PRP$", "RP", "TO", "WDT", "WP", "WRB"]
    sentence = re.sub(f"[{re.escape(string.punctuation)}\…]+", " ", sentence)
    # Filter common words
    tokens = nltk.pos_tag(sentence.split())
    tokens_ = tokens
    tokens = [tok for (tok, pos) in tokens if tok.lower() not in stop_words and pos not in INVALID_POS]
    return tokens

In [25]:
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'])

In [4]:
refs = pd.read_csv("./reference_summaries.csv")
refs.shape

(200, 5)

In [5]:
refs

Unnamed: 0,prod_id,summ_1,summ_2,summ_3,comments
0,B000JCDUFG,the customers feel that the newspaper they bou...,the newspaper is a nice read with updated news...,the newspaper is easy to read and navigate and...,
1,B000M92GLK,a headset that is light and comfortable with a...,"this bluetooth handsfree works great, has very...",the headset is well made and it's really easy ...,
2,B000Q5J2M6,"Overall the reviews are mixed, while some user...",A lightweight Bluetooth headset. Compatible wi...,the headset offers good volume control for inc...,
3,B000SRGF2W,"This short military romance story, Male Call, ...",short story with a lot of sex. seems a bit imm...,"the book brings fun to readers, at a good pric...",
4,B000WCWUWM,a simple and fast paced book with interesting ...,"the book is decent, with a simple storyline an...",a basic and simple love story to read. charact...,
...,...,...,...,...,...
195,B00CX1LJJK,This phone case is cute and is of decent quali...,May be a poor fit for your phone. Ears come of...,the ears can come off easily at times and the ...,2 disagreements
196,B00CYNN4ZO,The phone case make it hard to charge the phon...,this plastic case is water resistant and it pr...,This case is great if it is used extreme condi...,
197,B00IY5RM04,This short book is about a fun erotic story. A...,this is an adult book as a warning. it is a qu...,this book is good to read abut not enough sex ...,
198,B00K7Q8I1A,a short book about love. it ends at a cliffhan...,the book was nice to read but one have buy nex...,"This erotic book is good overall. However, the...",2 disagreements


In [21]:
def get_scores(path, refs):
    df = pd.read_csv(path)
    df = pd.merge(df, refs, how="inner", on="prod_id")
    df["summary"] = df["summary"].replace(np.nan, "")
    all_scores = []
    all_scores_with_prod_id = dict()
    for i,prod_id in enumerate(df["prod_id"].unique()):
        data = df.loc[df["prod_id"] == prod_id]
        gen_summ = data["summary"].values[0]
        ref_summ = [data["summ_1"].values[0], data["summ_2"].values[0], data["summ_3"].values[0]]
        prod_scores = []
        for rs in ref_summ:
            #ref = ' '.join([str(word) for word in self.nlp(str(ref)) if str(word) not inself.stopwords])
            rs = " ".join(tokenize(rs))
            gen_summ_ = " ".join(tokenize(gen_summ))
            scores = scorer.score(gen_summ_, rs)
            r1_p, r1_r, r1_f = scores["rouge1"]
            r2_p, r2_r, r2_f = scores["rouge2"]
            rL_p, rL_r, rL_f = scores["rougeL"]
            prod_scores.append([[r1_p, r1_r, r1_f], [r2_p, r2_r, r2_f], [rL_p, rL_r, rL_f]])
        
        prod_score_arr = np.array(prod_scores)
        all_scores.append(prod_scores)
        all_scores_with_prod_id[prod_id] = {
            "rouge1": {
                "precision": {"mean": round(prod_score_arr.mean(0)[0][0], 5), "max": round(prod_score_arr.max(0)[0][0], 5), "min": round(prod_score_arr.min(0)[0][0], 5)},
                "recall": {"mean": round(prod_score_arr.mean(0)[0][1], 5), "max": round(prod_score_arr.max(0)[0][1], 5), "min": round(prod_score_arr.min(0)[0][1], 5)},
                "fscore": {"mean": round(prod_score_arr.mean(0)[0][1], 5), "max": round(prod_score_arr.max(0)[0][2], 5), "min": round(prod_score_arr.min(0)[0][2], 5)}
            },
            "rouge2": {
                "precision": {"mean": round(prod_score_arr.mean(0)[1][0], 5), "max": round(prod_score_arr.max(0)[1][0], 5), "min": round(prod_score_arr.min(0)[1][0], 5)},
                "recall": {"mean": round(prod_score_arr.mean(0)[1][1], 5), "max": round(prod_score_arr.max(0)[1][1], 5), "min": round(prod_score_arr.min(0)[1][1], 5)},
                "fscore": {"mean": round(prod_score_arr.mean(0)[1][2], 5), "max": round(prod_score_arr.max(0)[1][2], 5), "min": round(prod_score_arr.min(0)[1][2], 5)}
            },
            "rougeL": {
                "precision": {"mean": round(prod_score_arr.mean(0)[2][0], 5), "max": round(prod_score_arr.max(0)[2][0], 5), "min": round(prod_score_arr.min(0)[2][0], 5)},
                "recall": {"mean": round(prod_score_arr.mean(0)[2][1], 5), "max": round(prod_score_arr.max(0)[2][1], 5), "min": round(prod_score_arr.min(0)[2][1], 5)},
                "fscore": {"mean": round(prod_score_arr.mean(0)[2][2], 5), "max": round(prod_score_arr.max(0)[2][2], 5), "min": round(prod_score_arr.min(0)[2][2], 5)}
            }
        }
        
        #if prod_id == "B0096HVJGM":
        print("===========", prod_id)
        print(gen_summ)
        print()
        for r in  ref_summ:
            print(">>   ", r)
        print()
        print()
        print(all_scores_with_prod_id[prod_id])
        print("-"*100)
        if i == 3:
            pause
        
    all_scores = np.array(all_scores)
    mean_scores = all_scores.mean(1).mean(0)
    max_scores = all_scores.max(1).mean(0)
    min_scores = all_scores.min(1).mean(0)
    
    output = {
        "rouge1": {
            "precision": {"mean": round(mean_scores[0][0], 5), "max": round(max_scores[0][0], 5), "min": round(min_scores[0][0], 5)},
            "recall": {"mean": round(mean_scores[0][1], 5), "max": round(max_scores[0][1], 5), "min": round(min_scores[0][1], 5)},
            "fscore": {"mean": round(mean_scores[0][2], 5), "max": round(max_scores[0][2], 5), "min": round(min_scores[0][2], 5)}
        },
        "rouge2": {
            "precision": {"mean": round(mean_scores[1][0], 5), "max": round(max_scores[1][0], 5), "min": round(min_scores[1][0], 5)},
            "recall": {"mean": round(mean_scores[1][1], 5), "max": round(max_scores[1][1], 5), "min": round(min_scores[1][1], 5)},
            "fscore": {"mean": round(mean_scores[1][2], 5), "max": round(max_scores[1][2], 5), "min": round(min_scores[1][2], 5)}
        },
        "rougeL": {
            "precision": {"mean": round(mean_scores[2][0], 5), "max": round(max_scores[2][0], 5), "min": round(min_scores[2][0], 5)},
            "recall": {"mean": round(mean_scores[2][1], 5), "max": round(max_scores[2][1], 5), "min": round(min_scores[2][1], 5)},
            "fscore": {"mean": round(mean_scores[2][2], 5), "max": round(max_scores[2][2], 5), "min": round(min_scores[2][2], 5)}
        }
    }
    
    return output, all_scores_with_prod_id

In [7]:
def metrics_csv(model, scores):
    df = []
    for prod_id in scores.keys():
        data = scores[prod_id]
        row = dict()
        row["model"] = model
        row["prod_id"] = prod_id
        for k,v in zip(["r1", "r2", "rL"], ["rouge1", "rouge2", "rougeL"]):
            for m in ["precision", "recall", "fscore"]:
                row[f"{k}_{m}_mean"] = data[v][m]["mean"]
                row[f"{k}_{m}_max"] = data[v][m]["max"]
                row[f"{k}_{m}_min"] = data[v][m]["min"]
        df.append(row)
    df = pd.DataFrame(df)
    return df

### Save ROUGE scores in Json files

In [59]:
gpt2_scores, gpt2_all_scores_with_prod_id = get_scores(path="gpt2_summaries.csv", refs=refs)
with open('gpt2_avg_scores.json', 'w') as f:
    json.dump(gpt2_scores, f)

with open('gpt2_all_scores.json', 'w') as f:
    json.dump(gpt2_all_scores_with_prod_id, f)
    
gpt2_scores_csv = metrics_csv("gpt2", gpt2_all_scores_with_prod_id)
gpt2_scores_csv.to_csv("gpt2_all_scores.csv", index=False)

In [60]:
textrank_scores, tr_all_scores_with_prod_id = get_scores(path="textrank_summaries.csv", refs=refs)
with open('textrank_avg_scores.json', 'w') as f:
    json.dump(textrank_scores, f)
    
with open('textrank_all_scores.json', 'w') as f:
    json.dump(tr_all_scores_with_prod_id, f)
    
tr_scores_csv = metrics_csv("textrank", tr_all_scores_with_prod_id)
tr_scores_csv.to_csv("textrank_all_scores.csv", index=False)

In [61]:
meansum_scores, meansum_all_scores_with_prod_id = get_scores(path="meansum_summaries.csv", refs=refs)
with open('meansum_avg_scores.json', 'w') as f:
    json.dump(meansum_scores, f)
    
with open('meansum_all_scores.json', 'w') as f:
    json.dump(meansum_all_scores_with_prod_id, f)
    
meansum_scores_csv = metrics_csv("meansum", meansum_all_scores_with_prod_id)
meansum_scores_csv.to_csv("meansum_all_scores.csv", index=False)

FileNotFoundError: [Errno 2] No such file or directory: 'meansum_summaries.csv'

In [31]:
neutralsumm_scores, neutralsumm_all_scores_with_prod_id = get_scores(path="../outputs/gpt2_summaries.csv", refs=refs)
neutralsumm_scores
#with open('neutralsumm_avg_scores.json', 'w') as f:
#    json.dump(neutralsumm_scores, f)
    
#with open('neutralsumm_all_scores.json', 'w') as f:
#    json.dump(neutralsumm_all_scores_with_prod_id, f)
    
#neutralsumm_scores_csv = metrics_csv("neutralsumm", neutralsumm_all_scores_with_prod_id)
#neutralsumm_scores_csv.to_csv("neutralsumm.csv", index=False)

Thanks Denver Post. Not once, but twice in two weeks. I give the Denver Post 3 stars because there is no Weather (really? Reading it is so easy....love saving the trees! So easy to hold.

>>    the customers feel that the newspaper they bougth is worth reading and got information regarding the local activities and prices are reasonale too.
>>    the newspaper is a nice read with updated news everyday. however it doesn't always arrive on time, and it has some delivery problems.
>>    the newspaper is easy to read and navigate and apart from some delivery issues is well made


{'rouge1': {'precision': {'mean': 0.06268, 'max': 0.11111, 'min': 0.0}, 'recall': {'mean': 0.03704, 'max': 0.05556, 'min': 0.0}, 'fscore': {'mean': 0.03704, 'max': 0.07407, 'min': 0.0}}, 'rouge2': {'precision': {'mean': 0.0, 'max': 0.0, 'min': 0.0}, 'recall': {'mean': 0.0, 'max': 0.0, 'min': 0.0}, 'fscore': {'mean': 0.0, 'max': 0.0, 'min': 0.0}}, 'rougeL': {'precision': {'mean': 0.06268, 'max': 0.11111, 'min': 0.0}

NameError: name 'pause' is not defined