/
cal_rouge.py
107 lines (94 loc) · 4.17 KB
/
cal_rouge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from os.path import join
import logging
import tempfile
import subprocess as sp
from pyrouge import Rouge155
from pyrouge.utils import log
import os
import argparse
from nltk import sent_tokenize
from compare_mt.rouge.rouge_scorer import RougeScorer
"""
Code from BRIO (ACL, 2022)
- link : https://github.com/yixinL7/BRIO/blob/main/cal_rouge.py
"""
_ROUGE_PATH = '/workspace/pyrouge/rouge/tools/ROUGE-1.5.5/'
def eval_rouge(dec_dir, ref_dir, Print=False):
assert _ROUGE_PATH is not None
log.get_global_console_logger().setLevel(logging.WARNING)
dec_pattern = '(\d+).dec'
ref_pattern = '#ID#.ref'
cmd = '-c 95 -r 1000 -n 2 -m'
with tempfile.TemporaryDirectory() as tmp_dir:
Rouge155.convert_summaries_to_rouge_format(
dec_dir, join(tmp_dir, 'dec'))
Rouge155.convert_summaries_to_rouge_format(
ref_dir, join(tmp_dir, 'ref'))
Rouge155.write_config_static(
join(tmp_dir, 'dec'), dec_pattern,
join(tmp_dir, 'ref'), ref_pattern,
join(tmp_dir, 'settings.xml'), system_id=1
)
cmd = ('perl ' + join(_ROUGE_PATH, 'ROUGE-1.5.5.pl')
+ ' -e {} '.format(join(_ROUGE_PATH, 'data'))
+ cmd
+ ' -a {}'.format(join(tmp_dir, 'settings.xml')))
print(cmd)
output = sp.check_output(cmd.split(' '), universal_newlines=True)
R_1 = float(output.split('\n')[3].split(' ')[3])
R_2 = float(output.split('\n')[7].split(' ')[3])
R_L = float(output.split('\n')[11].split(' ')[3])
print(output)
if Print is True:
rouge_path = join(dec_dir, '../ROUGE.txt')
with open(rouge_path, 'w') as f:
print(output, file=f)
print(f'R_1 : {R_1}, R_2 : {R_2}, R_L : {R_L}')
return R_1, R_2, R_L
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Parameters')
parser.add_argument("--ref", type=str, help="path of a directory or a file containing reference summaries", required=True)
parser.add_argument("--hyp", type=str, help="path of a directory or a file containing candidate summaries", required=True)
parser.add_argument("-p", "--python", action="store_true", help="use python rouge")
parser.add_argument("-l", "--lower", action="store_true", help="lowercase")
args = parser.parse_args()
if not os.path.isdir(args.ref):
# if args.ref is a file, generate a directory to store the summaries
ref_dir = tempfile.mkdtemp()
with open(args.ref, 'r') as f:
for (i, line) in enumerate(f):
line = line.strip()
if args.lower:
line = line.lower()
with open(join(ref_dir, f"{i}.ref"), 'w') as f2:
for x in sent_tokenize(line):
print(x, file=f2)
hyp_dir = tempfile.mkdtemp()
with open(args.hyp, 'r') as f:
for (i, line) in enumerate(f):
line = line.strip()
if args.lower:
line = line.lower()
with open(join(hyp_dir, f"{i}.dec"), 'w') as f2:
for x in sent_tokenize(line):
print(x, file=f2)
else:
ref_dir = args.ref
hyp_dir = args.hyp
if args.python:
rouge_scorer = RougeScorer(['rouge1', 'rouge2', 'rougeLsum'], use_stemmer=True)
rouge1, rouge2, rougeLsum = 0, 0, 0
cnt = 0
num = len(os.listdir(ref_dir))
for i in range(num):
ref = open(join(ref_dir, f"{i}.ref"), 'r').read()
hyp = open(join(hyp_dir, f"{i}.dec"), 'r').read()
scores = rouge_scorer.score(ref, hyp)
rouge1 += scores['rouge1'].fmeasure
rouge2 += scores['rouge2'].fmeasure
rougeLsum += scores['rougeLsum'].fmeasure
cnt += 1
print("rouge1: %.6f, rouge2: %.6f, rougeL: %.6f"%(rouge1 / cnt * 100, rouge2 / cnt * 100, rougeLsum / cnt * 100))
else:
r1,r2,rl = eval_rouge(hyp_dir, ref_dir, Print=True)
print("rouge1: %.6f, rouge2: %.6f, rougeL: %.6f"%(r1, r2, rl))