## Self-defined ROUGE

In [13]:
#!/usr/bin/env python
# 
# File Name : rouge.py
#
# Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)
#
# Creation Date : 2015-01-07 06:03
# Author : Ramakrishna Vedantam <vrama91@vt.edu>

import numpy as np
import pdb

def my_lcs(string, sub):
    """
    Calculates longest common subsequence for a pair of tokenized strings
    :param string : list of str : tokens from a string split using whitespace
    :param sub : list of str : shorter string, also split using whitespace
    :returns: length (list of int): length of the longest common subsequence between the two strings

    Note: my_lcs only gives length of the longest common subsequence, not the actual LCS
    """
    if(len(string)< len(sub)):
        sub, string = string, sub

    lengths = [[0 for i in range(0,len(sub)+1)] for j in range(0,len(string)+1)]

    for j in range(1,len(sub)+1):
        for i in range(1,len(string)+1):
            if(string[i-1] == sub[j-1]):
                lengths[i][j] = lengths[i-1][j-1] + 1
            else:
                lengths[i][j] = max(lengths[i-1][j] , lengths[i][j-1])

    return lengths[len(string)][len(sub)]

class Rouge():
    '''
    Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set

    '''
    def __init__(self):
        # vrama91: updated the value below based on discussion with Hovey
        self.beta = 1.2

    def calc_score(self, candidate, refs):
        """
        Compute ROUGE-L score given one candidate and references for an image
        :param candidate: str : candidate sentence to be evaluated
        :param refs: list of str : COCO reference sentences for the particular image to be evaluated
        :returns score: int (ROUGE-L score for the candidate evaluated against references)
        """
        assert(len(candidate)==1)	
        assert(len(refs)>0)         
        prec = []
        rec = []

        # split into tokens
        token_c = candidate[0].split(" ")
    	
        for reference in refs:
            # split into tokens
            token_r = reference.split(" ")
            # compute the longest common subsequence
            lcs = my_lcs(token_r, token_c)
            prec.append(lcs/float(len(token_c)))
            rec.append(lcs/float(len(token_r)))

        prec_max = max(prec)
        rec_max = max(rec)

        if(prec_max!=0 and rec_max !=0):
            score = ((1 + self.beta**2)*prec_max*rec_max)/float(rec_max + self.beta**2*prec_max)
        else:
            score = 0.0
        return score

    def compute_score(self, gts, res):
        """
        Computes Rouge-L score given a set of reference and candidate sentences for the dataset
        Invoked by evaluate_captions.py 
        :param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values 
        :param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values
        :returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images)
        """
        # assert(gts.keys() == res.keys())
        # imgIds = gts.keys()
        #
        # score = []
        # for id in imgIds:
        #     hypo = res[id]
        #     ref  = gts[id]

        assert (len(gts) == len(res))

        score = []
        for idx in range(len(gts)):
            hypo = [res[idx]]
            ref  = [gts[idx]]

            score.append(self.calc_score(hypo, ref))

            # Sanity check.
            assert(type(hypo) is list)
            assert(len(hypo) == 1)
            assert(type(ref) is list)
            assert(len(ref) > 0)

        average_score = np.mean(np.array(score))
        return average_score, np.array(score)

    def method(self):
        return "Rouge"


In [14]:
rouge=Rouge()

In [19]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/textrank.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
with open(hyp_path,'r') as f:
    hyps = f.readlines()
with open(ref_path,'r') as f:
    refs = f.readlines()

In [20]:
print(rouge.compute_score(refs,hyps))

(0.18408339989737207, array([0.14124107, 0.18305173, 0.19205397, ..., 0.20318489, 0.17531193,
       0.15565479]))


In [21]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/textrank.tokenized"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.tokenized"
with open(hyp_path,'r') as f:
    hyps = f.readlines()
with open(ref_path,'r') as f:
    refs = f.readlines()
print(rouge.compute_score(refs,hyps))

(0.2050288622385424, array([0.1568215 , 0.19615274, 0.21642576, ..., 0.23908572, 0.19167629,
       0.20697546]))


## ROUGE package

In [31]:
# creare the data as pyrouge needs
with open("/home/yued/phd/LRD/lrd_summ/result/textrank.txt",'r') as f:
    hyps=f.readlines()
for i,h in enumerate(hyps):
    with open("/home/yued/phd/LRD/lrd_summ/result/hyps/%d_decoded.txt"%i,'w') as f:
        if len(h)==0: #deal with empty lines
            h='n'
        f.write(h)

with open("/home/yued/phd/LRD/lrd_summ/result/ref.txt",'r') as f:
    refs=f.readlines()
for i,h in enumerate(refs):
    with open("/home/yued/phd/LRD/lrd_summ/result/refs/%d_reference.txt"%i,'w') as f:
        f.write(h)


In [32]:
# from pyrouge import Rouge155
# r = Rouge155()
# # set directories
# r.system_dir = "/home/yued/phd/LRD/lrd_summ/result/hyps/"
# r.model_dir = "/home/yued/phd/LRD/lrd_summ/result/refs/"
 
# # define the patterns
# r.system_filename_pattern = '(\d+)_decoded.txt'
# r.model_filename_pattern = '#ID#_reference.txt'
 
# # use default parameters to run the evaluation
# # output = r.convert_and_evaluate()
# # print(output)
# # output_dict = r.output_to_dict(output)

In [38]:
print(output_dict)

{'rouge_1_recall': 0.39279, 'rouge_1_recall_cb': 0.39033, 'rouge_1_recall_ce': 0.39523, 'rouge_1_precision': 0.45124, 'rouge_1_precision_cb': 0.44826, 'rouge_1_precision_ce': 0.45422, 'rouge_1_f_score': 0.40887, 'rouge_1_f_score_cb': 0.40669, 'rouge_1_f_score_ce': 0.41109, 'rouge_2_recall': 0.12574, 'rouge_2_recall_cb': 0.12382, 'rouge_2_recall_ce': 0.12763, 'rouge_2_precision': 0.14778, 'rouge_2_precision_cb': 0.14548, 'rouge_2_precision_ce': 0.1502, 'rouge_2_f_score': 0.13241, 'rouge_2_f_score_cb': 0.13046, 'rouge_2_f_score_ce': 0.13441, 'rouge_3_recall': 0.05961, 'rouge_3_recall_cb': 0.05803, 'rouge_3_recall_ce': 0.06125, 'rouge_3_precision': 0.07052, 'rouge_3_precision_cb': 0.06865, 'rouge_3_precision_ce': 0.07238, 'rouge_3_f_score': 0.06296, 'rouge_3_f_score_cb': 0.06138, 'rouge_3_f_score_ce': 0.06466, 'rouge_4_recall': 0.03597, 'rouge_4_recall_cb': 0.03466, 'rouge_4_recall_ce': 0.03737, 'rouge_4_precision': 0.04266, 'rouge_4_precision_cb': 0.0411, 'rouge_4_precision_ce': 0.04421,

## file2rouge

In [4]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.44341 (95%-conf.int. 0.44115 - 0.44596)
1 ROUGE-1 Average_P: 0.46809 (95%-conf.int. 0.46559 - 0.47066)
1 ROUGE-1 Average_F: 0.44489 (95%-conf.int. 0.44309 - 0.44674)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14117 (95%-conf.int. 0.13925 - 0.14306)
1 ROUGE-2 Average_P: 0.15058 (95%-conf.int. 0.14842 - 0.15272)
1 ROUGE-2 Average_F: 0.14230 (95%-conf.int. 0.14048 - 0.14408)
---------------------------------------------
1 ROUGE-L Average_R: 0.20951 (95%-conf.int. 0.20808 - 0.21108)
1 ROUGE-L Average_P: 0.22095 (95%-conf.int. 0.21947 - 0.22254)
1 ROUGE-L Average_F: 0.20965 (95%-conf.int. 0.20846 - 0.21095)

Elapsed time: 586.668 seconds


In [5]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/lexrank.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.45963 (95%-conf.int. 0.45760 - 0.46180)
1 ROUGE-1 Average_P: 0.47526 (95%-conf.int. 0.47247 - 0.47796)
1 ROUGE-1 Average_F: 0.45684 (95%-conf.int. 0.45499 - 0.45873)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14622 (95%-conf.int. 0.14453 - 0.14804)
1 ROUGE-2 Average_P: 0.15409 (95%-conf.int. 0.15213 - 0.15611)
1 ROUGE-2 Average_F: 0.14678 (95%-conf.int. 0.14515 - 0.14859)
---------------------------------------------
1 ROUGE-L Average_R: 0.21989 (95%-conf.int. 0.21838 - 0.22137)
1 ROUGE-L Average_P: 0.22636 (95%-conf.int. 0.22478 - 0.22787)
1 ROUGE-L Average_F: 0.21780 (95%-conf.int. 0.21656 - 0.21907)

Elapsed time: 604.138 seconds


In [30]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/textrank.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path+'.txt', ref_path)

6000
Write 5999 lines into /home/yued/phd/LRD/lrd_summ/result/textrank.txt.txt
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.42116 (95%-conf.int. 0.41865 - 0.42376)
1 ROUGE-1 Average_P: 0.48361 (95%-conf.int. 0.48054 - 0.48665)
1 ROUGE-1 Average_F: 0.43832 (95%-conf.int. 0.43601 - 0.44048)
---------------------------------------------
1 ROUGE-2 Average_R: 0.13582 (95%-conf.int. 0.13385 - 0.13772)
1 ROUGE-2 Average_P: 0.15948 (95%-conf.int. 0.15692 - 0.16174)
1 ROUGE-2 Average_F: 0.14296 (95%-conf.int. 0.14097 - 0.14493)
---------------------------------------------
1 ROUGE-L Average_R: 0.20935 (95%-conf.int. 0.20770 - 0.21108)
1 ROUGE-L Average_P: 0.24084 (95%-conf.int. 0.23899 - 0.24281)
1 ROUGE-L Average_F: 0.21756 (95%-conf.int. 0.21607 - 0.21903)

Elapsed time: 289.079 seconds


In [8]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_st_bert_base.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.44863 (95%-conf.int. 0.44635 - 0.45115)
1 ROUGE-1 Average_P: 0.47218 (95%-conf.int. 0.46961 - 0.47471)
1 ROUGE-1 Average_F: 0.44935 (95%-conf.int. 0.44744 - 0.45118)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14505 (95%-conf.int. 0.14320 - 0.14693)
1 ROUGE-2 Average_P: 0.15423 (95%-conf.int. 0.15214 - 0.15646)
1 ROUGE-2 Average_F: 0.14594 (95%-conf.int. 0.14414 - 0.14787)
---------------------------------------------
1 ROUGE-L Average_R: 0.21240 (95%-conf.int. 0.21092 - 0.21409)
1 ROUGE-L Average_P: 0.22325 (95%-conf.int. 0.22179 - 0.22488)
1 ROUGE-L Average_F: 0.21213 (95%-conf.int. 0.21097 - 0.21344)

Elapsed time: 580.215 seconds


In [13]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_st_roberta_large.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.44946 (95%-conf.int. 0.44721 - 0.45189)
1 ROUGE-1 Average_P: 0.47298 (95%-conf.int. 0.47043 - 0.47550)
1 ROUGE-1 Average_F: 0.45022 (95%-conf.int. 0.44835 - 0.45210)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14546 (95%-conf.int. 0.14354 - 0.14736)
1 ROUGE-2 Average_P: 0.15467 (95%-conf.int. 0.15245 - 0.15687)
1 ROUGE-2 Average_F: 0.14640 (95%-conf.int. 0.14452 - 0.14831)
---------------------------------------------
1 ROUGE-L Average_R: 0.21241 (95%-conf.int. 0.21096 - 0.21408)
1 ROUGE-L Average_P: 0.22318 (95%-conf.int. 0.22167 - 0.22479)
1 ROUGE-L Average_F: 0.21214 (95%-conf.int. 0.21092 - 0.21345)

Elapsed time: 307.542 seconds


In [10]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_add_f=0.0_b=1.0_s=0.5.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43786 (95%-conf.int. 0.43561 - 0.44030)
1 ROUGE-1 Average_P: 0.47054 (95%-conf.int. 0.46802 - 0.47307)
1 ROUGE-1 Average_F: 0.44315 (95%-conf.int. 0.44139 - 0.44501)
---------------------------------------------
1 ROUGE-2 Average_R: 0.13923 (95%-conf.int. 0.13731 - 0.14104)
1 ROUGE-2 Average_P: 0.15122 (95%-conf.int. 0.14911 - 0.15336)
1 ROUGE-2 Average_F: 0.14163 (95%-conf.int. 0.13976 - 0.14343)
---------------------------------------------
1 ROUGE-L Average_R: 0.20750 (95%-conf.int. 0.20607 - 0.20905)
1 ROUGE-L Average_P: 0.22277 (95%-conf.int. 0.22124 - 0.22436)
1 ROUGE-L Average_F: 0.20952 (95%-conf.int. 0.20827 - 0.21083)

Elapsed time: 300.740 seconds


In [11]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_add_f=-0.2_b=1.0_s=0.5.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43791 (95%-conf.int. 0.43569 - 0.44037)
1 ROUGE-1 Average_P: 0.47061 (95%-conf.int. 0.46809 - 0.47314)
1 ROUGE-1 Average_F: 0.44322 (95%-conf.int. 0.44144 - 0.44507)
---------------------------------------------
1 ROUGE-2 Average_R: 0.13924 (95%-conf.int. 0.13730 - 0.14103)
1 ROUGE-2 Average_P: 0.15126 (95%-conf.int. 0.14914 - 0.15340)
1 ROUGE-2 Average_F: 0.14165 (95%-conf.int. 0.13978 - 0.14349)
---------------------------------------------
1 ROUGE-L Average_R: 0.20752 (95%-conf.int. 0.20610 - 0.20908)
1 ROUGE-L Average_P: 0.22280 (95%-conf.int. 0.22126 - 0.22438)
1 ROUGE-L Average_F: 0.20954 (95%-conf.int. 0.20827 - 0.21084)

Elapsed time: 299.126 seconds


In [12]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_add_f=-0.2_b=1.0_s=1.0.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43788 (95%-conf.int. 0.43544 - 0.44041)
1 ROUGE-1 Average_P: 0.46902 (95%-conf.int. 0.46656 - 0.47147)
1 ROUGE-1 Average_F: 0.44249 (95%-conf.int. 0.44063 - 0.44434)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14046 (95%-conf.int. 0.13841 - 0.14237)
1 ROUGE-2 Average_P: 0.15190 (95%-conf.int. 0.14968 - 0.15412)
1 ROUGE-2 Average_F: 0.14258 (95%-conf.int. 0.14055 - 0.14448)
---------------------------------------------
1 ROUGE-L Average_R: 0.20835 (95%-conf.int. 0.20679 - 0.20991)
1 ROUGE-L Average_P: 0.22286 (95%-conf.int. 0.22125 - 0.22448)
1 ROUGE-L Average_F: 0.21000 (95%-conf.int. 0.20871 - 0.21128)

Elapsed time: 301.571 seconds


In [14]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_add_f=-0.2_b=1.0_s=1.5.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43718 (95%-conf.int. 0.43475 - 0.43973)
1 ROUGE-1 Average_P: 0.46839 (95%-conf.int. 0.46585 - 0.47108)
1 ROUGE-1 Average_F: 0.44182 (95%-conf.int. 0.43996 - 0.44366)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14069 (95%-conf.int. 0.13855 - 0.14260)
1 ROUGE-2 Average_P: 0.15203 (95%-conf.int. 0.14971 - 0.15434)
1 ROUGE-2 Average_F: 0.14274 (95%-conf.int. 0.14070 - 0.14465)
---------------------------------------------
1 ROUGE-L Average_R: 0.20855 (95%-conf.int. 0.20698 - 0.21009)
1 ROUGE-L Average_P: 0.22303 (95%-conf.int. 0.22142 - 0.22455)
1 ROUGE-L Average_F: 0.21017 (95%-conf.int. 0.20887 - 0.21139)

Elapsed time: 303.856 seconds


In [15]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_add_f=0.5_b=1.0_s=0.5.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43790 (95%-conf.int. 0.43572 - 0.44041)
1 ROUGE-1 Average_P: 0.47048 (95%-conf.int. 0.46798 - 0.47298)
1 ROUGE-1 Average_F: 0.44316 (95%-conf.int. 0.44139 - 0.44504)
---------------------------------------------
1 ROUGE-2 Average_R: 0.13922 (95%-conf.int. 0.13728 - 0.14112)
1 ROUGE-2 Average_P: 0.15116 (95%-conf.int. 0.14908 - 0.15328)
1 ROUGE-2 Average_F: 0.14161 (95%-conf.int. 0.13977 - 0.14342)
---------------------------------------------
1 ROUGE-L Average_R: 0.20750 (95%-conf.int. 0.20603 - 0.20903)
1 ROUGE-L Average_P: 0.22271 (95%-conf.int. 0.22120 - 0.22433)
1 ROUGE-L Average_F: 0.20949 (95%-conf.int. 0.20826 - 0.21085)

Elapsed time: 297.111 seconds


In [16]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_add_f=0.5_b=1.0_s=1.0.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43748 (95%-conf.int. 0.43513 - 0.44000)
1 ROUGE-1 Average_P: 0.46880 (95%-conf.int. 0.46631 - 0.47128)
1 ROUGE-1 Average_F: 0.44218 (95%-conf.int. 0.44034 - 0.44402)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14017 (95%-conf.int. 0.13816 - 0.14202)
1 ROUGE-2 Average_P: 0.15165 (95%-conf.int. 0.14946 - 0.15386)
1 ROUGE-2 Average_F: 0.14232 (95%-conf.int. 0.14034 - 0.14422)
---------------------------------------------
1 ROUGE-L Average_R: 0.20814 (95%-conf.int. 0.20658 - 0.20973)
1 ROUGE-L Average_P: 0.22273 (95%-conf.int. 0.22113 - 0.22433)
1 ROUGE-L Average_F: 0.20984 (95%-conf.int. 0.20857 - 0.21112)

Elapsed time: 306.595 seconds


In [17]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_add_f=0.5_b=1.0_s=1.5.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43683 (95%-conf.int. 0.43442 - 0.43937)
1 ROUGE-1 Average_P: 0.46798 (95%-conf.int. 0.46543 - 0.47059)
1 ROUGE-1 Average_F: 0.44142 (95%-conf.int. 0.43950 - 0.44325)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14028 (95%-conf.int. 0.13816 - 0.14212)
1 ROUGE-2 Average_P: 0.15161 (95%-conf.int. 0.14933 - 0.15395)
1 ROUGE-2 Average_F: 0.14232 (95%-conf.int. 0.14030 - 0.14418)
---------------------------------------------
1 ROUGE-L Average_R: 0.20828 (95%-conf.int. 0.20671 - 0.20981)
1 ROUGE-L Average_P: 0.22271 (95%-conf.int. 0.22113 - 0.22426)
1 ROUGE-L Average_F: 0.20986 (95%-conf.int. 0.20860 - 0.21109)

Elapsed time: 311.969 seconds


In [18]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200_196_multiply.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43703 (95%-conf.int. 0.43463 - 0.43952)
1 ROUGE-1 Average_P: 0.46823 (95%-conf.int. 0.46579 - 0.47086)
1 ROUGE-1 Average_F: 0.44165 (95%-conf.int. 0.43984 - 0.44350)
---------------------------------------------
1 ROUGE-2 Average_R: 0.13999 (95%-conf.int. 0.13795 - 0.14189)
1 ROUGE-2 Average_P: 0.15144 (95%-conf.int. 0.14921 - 0.15363)
1 ROUGE-2 Average_F: 0.14211 (95%-conf.int. 0.14017 - 0.14394)
---------------------------------------------
1 ROUGE-L Average_R: 0.20862 (95%-conf.int. 0.20701 - 0.21018)
1 ROUGE-L Average_P: 0.22320 (95%-conf.int. 0.22161 - 0.22481)
1 ROUGE-L Average_F: 0.21023 (95%-conf.int. 0.20891 - 0.21156)

Elapsed time: 301.783 seconds


In [19]:
hyp_path="/home/yued/phd/LRD/lrd_summ/result/hipo/hiporank_rand_200196.txt"
ref_path="/home/yued/phd/LRD/lrd_summ/result/ref.txt"
import files2rouge
files2rouge.run(hyp_path, ref_path)

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.43712 (95%-conf.int. 0.43468 - 0.43973)
1 ROUGE-1 Average_P: 0.46856 (95%-conf.int. 0.46611 - 0.47115)
1 ROUGE-1 Average_F: 0.44181 (95%-conf.int. 0.43989 - 0.44365)
---------------------------------------------
1 ROUGE-2 Average_R: 0.14063 (95%-conf.int. 0.13857 - 0.14253)
1 ROUGE-2 Average_P: 0.15219 (95%-conf.int. 0.14984 - 0.15448)
1 ROUGE-2 Average_F: 0.14272 (95%-conf.int. 0.14068 - 0.14467)
---------------------------------------------
1 ROUGE-L Average_R: 0.20849 (95%-conf.int. 0.20693 - 0.21014)
1 ROUGE-L Average_P: 0.22315 (95%-conf.int. 0.22152 - 0.22474)
1 ROUGE-L Average_F: 0.21011 (95%-conf.int. 0.20880 - 0.21131)

Elapsed time: 298.015 seconds


## LSA Result

In [None]:
# 1 ROUGE-1 Average_R: 0.38268 (95%-conf.int. 0.38044 - 0.38487)
# 1 ROUGE-1 Average_P: 0.39442 (95%-conf.int. 0.39251 - 0.39640)
# 1 ROUGE-1 Average_F: 0.37952 (95%-conf.int. 0.37805 - 0.38106)
# ---------------------------------------------
# 1 ROUGE-2 Average_R: 0.08916 (95%-conf.int. 0.08771 - 0.09060)
# 1 ROUGE-2 Average_P: 0.09286 (95%-conf.int. 0.09132 - 0.09419)
# 1 ROUGE-2 Average_F: 0.08881 (95%-conf.int. 0.08743 - 0.09007)
# ---------------------------------------------
# 1 ROUGE-L Average_R: 0.18220 (95%-conf.int. 0.18084 - 0.18365)
# 1 ROUGE-L Average_P: 0.18702 (95%-conf.int. 0.18583 - 0.18815)
# 1 ROUGE-L Average_F: 0.18008 (95%-conf.int. 0.17909 - 0.18115)

## TextRank Result

In [None]:
# ---------------------------------------------
# 1 ROUGE-1 Average_R: 0.42116 (95%-conf.int. 0.41865 - 0.42376)
# 1 ROUGE-1 Average_P: 0.48361 (95%-conf.int. 0.48054 - 0.48665)
# 1 ROUGE-1 Average_F: 0.43832 (95%-conf.int. 0.43601 - 0.44048)
# ---------------------------------------------
# 1 ROUGE-2 Average_R: 0.13582 (95%-conf.int. 0.13385 - 0.13772)
# 1 ROUGE-2 Average_P: 0.15948 (95%-conf.int. 0.15692 - 0.16174)
# 1 ROUGE-2 Average_F: 0.14296 (95%-conf.int. 0.14097 - 0.14493)
# ---------------------------------------------
# 1 ROUGE-L Average_R: 0.20935 (95%-conf.int. 0.20770 - 0.21108)
# 1 ROUGE-L Average_P: 0.24084 (95%-conf.int. 0.23899 - 0.24281)
# 1 ROUGE-L Average_F: 0.21756 (95%-conf.int. 0.21607 - 0.21903)

## LexRank

In [42]:
# ---------------------------------------------
# 1 ROUGE-1 Average_R: 0.45963 (95%-conf.int. 0.45760 - 0.46180)
# 1 ROUGE-1 Average_P: 0.47526 (95%-conf.int. 0.47247 - 0.47796)
# 1 ROUGE-1 Average_F: 0.45684 (95%-conf.int. 0.45499 - 0.45873)
# ---------------------------------------------
# 1 ROUGE-2 Average_R: 0.14622 (95%-conf.int. 0.14453 - 0.14804)
# 1 ROUGE-2 Average_P: 0.15409 (95%-conf.int. 0.15213 - 0.15611)
# 1 ROUGE-2 Average_F: 0.14678 (95%-conf.int. 0.14515 - 0.14859)
# ---------------------------------------------
# 1 ROUGE-L Average_R: 0.21989 (95%-conf.int. 0.21838 - 0.22137)
# 1 ROUGE-L Average_P: 0.22636 (95%-conf.int. 0.22478 - 0.22787)
# 1 ROUGE-L Average_F: 0.21780 (95%-conf.int. 0.21656 - 0.21907)

############## with tokenizer
# export CLASSPATH=/home/yued/tools/stanford-corenlp-4.2.0/stanford-corenlp-4.2.0.jar
# cat lexrank.txt | java edu.stanford.nlp.process.PTBTokenizer -ioFileList -preserveLines > lexrank.tokenized
# ---------------------------------------------
# 1 ROUGE-1 Average_R: 0.45990 (95%-conf.int. 0.45792 - 0.46207)
# 1 ROUGE-1 Average_P: 0.47602 (95%-conf.int. 0.47331 - 0.47858)
# 1 ROUGE-1 Average_F: 0.45781 (95%-conf.int. 0.45601 - 0.45963)
# ---------------------------------------------
# 1 ROUGE-2 Average_R: 0.14630 (95%-conf.int. 0.14461 - 0.14813)
# 1 ROUGE-2 Average_P: 0.15432 (95%-conf.int. 0.15237 - 0.15628)
# 1 ROUGE-2 Average_F: 0.14708 (95%-conf.int. 0.14545 - 0.14891)
# ---------------------------------------------
# 1 ROUGE-L Average_R: 0.41820 (95%-conf.int. 0.41629 - 0.42022)
# 1 ROUGE-L Average_P: 0.43377 (95%-conf.int. 0.43108 - 0.43631)
# 1 ROUGE-L Average_F: 0.41674 (95%-conf.int. 0.41496 - 0.41851)

## SumBasics

In [None]:
# ---------------------------------------------
# 1 ROUGE-1 Average_R: 0.39161 (95%-conf.int. 0.38966 - 0.39358)
# 1 ROUGE-1 Average_P: 0.42084 (95%-conf.int. 0.41874 - 0.42294)
# 1 ROUGE-1 Average_F: 0.39647 (95%-conf.int. 0.39498 - 0.39797)
# ---------------------------------------------
# 1 ROUGE-2 Average_R: 0.08872 (95%-conf.int. 0.08757 - 0.08995)
# 1 ROUGE-2 Average_P: 0.09679 (95%-conf.int. 0.09557 - 0.09804)
# 1 ROUGE-2 Average_F: 0.09046 (95%-conf.int. 0.08936 - 0.09150)
# ---------------------------------------------
# 1 ROUGE-L Average_R: 0.17831 (95%-conf.int. 0.17713 - 0.17950)
# 1 ROUGE-L Average_P: 0.19078 (95%-conf.int. 0.18970 - 0.19191)
# 1 ROUGE-L Average_F: 0.17983 (95%-conf.int. 0.17902 - 0.18074)

## Hiporank random embedding

In [None]:
# ---------------------------------------------
# 1 ROUGE-1 Average_R: 0.44341 (95%-conf.int. 0.44115 - 0.44596)
# 1 ROUGE-1 Average_P: 0.46809 (95%-conf.int. 0.46559 - 0.47066)
# 1 ROUGE-1 Average_F: 0.44489 (95%-conf.int. 0.44309 - 0.44674)
# ---------------------------------------------
# 1 ROUGE-2 Average_R: 0.14117 (95%-conf.int. 0.13925 - 0.14306)
# 1 ROUGE-2 Average_P: 0.15058 (95%-conf.int. 0.14842 - 0.15272)
# 1 ROUGE-2 Average_F: 0.14230 (95%-conf.int. 0.14048 - 0.14408)
# ---------------------------------------------
# 1 ROUGE-L Average_R: 0.20951 (95%-conf.int. 0.20808 - 0.21108)
# 1 ROUGE-L Average_P: 0.22095 (95%-conf.int. 0.21947 - 0.22254)
# 1 ROUGE-L Average_F: 0.20965 (95%-conf.int. 0.20846 - 0.21095)
