In [1]:
import pandas as pd
import evaluate
from eval_utils import read_pmcids, sent_json, convert_sentence_obj, convert_lead_sentence_obj
import warnings
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", UserWarning)

In [2]:
IDS_PATH = '../../dataset/pmcids/test.txt'
JSON_DIR = '../../dataset/sentence_json/'

In [3]:
# 標籤摘要,參考摘要
def process_article(pmcid, json_path=JSON_DIR):
    sentJson = sent_json(f'{json_path}/{pmcid}.json')
    reference = convert_sentence_obj(sentJson['abstract'])
    if baseline == 'oracle':
        hypothesis = convert_sentence_obj(sentJson['body'], label=True)
    elif baseline == 'lead3':
        hypothesis = convert_lead_sentence_obj(sentJson['body'])
    return hypothesis, reference


# 平均摘要分數
def main(pmcid_file = IDS_PATH):  
    rouge = evaluate.load('rouge')
    pmcids = read_pmcids(pmcid_file)
    lst = ['ALL', 'I', 'M', 'R', 'D']
    hyp = {key: [] for key in lst}
    ref = {key: [] for key in lst}

    for pmcid in pmcids:
        hypothesis, reference = process_article(pmcid)
        for section in lst:
            section_filter = 'IMRD' if section == 'ALL' else section
            hyp_txt = '\n'.join([i['text'] for i in hypothesis if i['section'] in section_filter])
            ref_txt = '\n'.join([i['text'] for i in reference if i['section'] in section_filter])
            hyp[section].append(hyp_txt)
            ref[section].append(ref_txt)
            
    res = {key: rouge.compute(predictions=hyp[key], references=ref[key], use_stemmer=True, use_aggregator=True) for key in lst}
    return pd.DataFrame(res).round(4)

### Oracle 

In [4]:
%%time
baseline = 'oracle'
main()

CPU times: user 6min 1s, sys: 171 ms, total: 6min 2s
Wall time: 6min 3s


Unnamed: 0,ALL,I,M,R,D
rouge1,0.6646,0.5715,0.5424,0.6043,0.6097
rouge2,0.3854,0.338,0.2963,0.3632,0.407
rougeL,0.4316,0.4298,0.3792,0.4228,0.4859
rougeLsum,0.6301,0.5135,0.4819,0.5485,0.5519


### LEAD-3

In [5]:
%%time
baseline = 'lead3'
main()

CPU times: user 5min 23s, sys: 132 ms, total: 5min 23s
Wall time: 5min 24s


Unnamed: 0,ALL,I,M,R,D
rouge1,0.465,0.3116,0.3085,0.2793,0.3214
rouge2,0.1677,0.0975,0.1054,0.0839,0.104
rougeL,0.2237,0.1985,0.2011,0.1745,0.2036
rougeLsum,0.4347,0.2684,0.2696,0.2467,0.2689
