1. Preparations

In [17]:
import evaluation as eval
import numpy as np
import pandas as pd
import os
import json
from enum import Enum

In [18]:
def get_pdf_summary_path() -> str:
    result = os.path.abspath('..\\..\\resources\\benchmark\\pdf_summary')
    return result

def get_pdf_summary_subpath(subpath:str) -> str:
    result = os.path.join(
        get_pdf_summary_path(), 
        subpath
    )
    return result

2. Load Files into dictionaries

In [19]:
class SumAlgo(Enum):
    LSA = 'lsa'
    TEXTRANK = 'textrank'
    FALCON = 'falcon'
    TEXTRANK_BART = 'textrank_bart'
    TEXTRANK_FALCON = 'textrank_falcon'
    TEXTRANK_STABLELM = 'textrank_stablelm'
    REF = 'ref'

def get_file_name_list() -> list[str]:
    list = []
    for (_, _, filenames) in os.walk(get_pdf_summary_subpath(SumAlgo.REF.value)):
        for filename in filenames:
            list.append(filename.rstrip('.grobid.tei.xml.json'))
    return list

def get_file_path_list(sumAlgo:SumAlgo) -> list[str]:
    list = []
    for (dirpath, _, filenames) in os.walk(get_pdf_summary_subpath(sumAlgo.value)):
        for filename in filenames:
            list.append(os.path.join(dirpath, filename))
    return list

def get_file_dict(sumAlgo:SumAlgo) -> dict:
    path_list = get_file_path_list(sumAlgo) 
    file_name_list = get_file_name_list()
    result = {}
    for i in range(0, len(file_name_list)):
        path = path_list[i]
        file_name = file_name_list[i]
        result[file_name] = read_file(path)
    return result 

def read_file(path:str) -> any:
    result = ''
    with open(path, 'r') as f:
        result = json.load(f)
    return result

def get_segment_dict(sumAlgo:SumAlgo) -> dict:
    file_dict = get_file_dict(sumAlgo)
    file_name_list = get_file_name_list()
    result = {}
    for file_name in file_name_list:
        value = file_dict[file_name]
        segments = value['segments'] 
        result[file_name] = segments
    return result

ref_segment_dict = get_segment_dict(SumAlgo.REF)
textrank_segment_dict = get_segment_dict(SumAlgo.TEXTRANK)
lsa_segment_dict = get_segment_dict(SumAlgo.LSA)
file_name_list = get_file_name_list()

3. Evaluate

In [20]:
# Mean rouge_1 of all paragraphs
def get_header_dict(segments:list) -> dict:
    result = {}
    for segment in segments:
        header = segment['header']
        content = segment['content']
        result[header] = content
    return result 

def get_eval_by_header(ref_header_dict:dict, test_header_dict:dict, eval_method:eval.EvalMethod, sum_algo:SumAlgo) -> pd.DataFrame:
    ref_key_num = len(ref_header_dict.keys())
    test_key_num = len(test_header_dict.keys())
    if(ref_key_num != test_key_num):
        print("The number of keys do not match: " + len(ref_header_dict.keys()) + " " + len(ref_header_dict.keys()))
            
    result = {}
    for i in range(0, len(test_header_dict.keys())):
        header = list(ref_header_dict.keys())[i]
        ref_content = ref_header_dict[header]
        test_content = test_header_dict[header]
        # print(sum_algo.value)
        # print(test_content)
        eval_result = eval.eval_method_dict[eval_method](test_content, ref_content)
        result[header] = [eval_result]
    
    return pd.DataFrame(result, index=[eval_method.value]).transpose()

def evaluate(sum_algo:SumAlgo, ref_segment_dict:dict, eval_method:eval.EvalMethod) -> pd.DataFrame:
    test_segment_dict = get_segment_dict(sum_algo)
    file_name_list = get_file_name_list()
    result = {}
    result_list = []
    for file_name in file_name_list:
        ref_header_dict = get_header_dict(ref_segment_dict[file_name])
        test_header_dict = get_header_dict(test_segment_dict[file_name])

        eval_header = get_eval_by_header(ref_header_dict, test_header_dict, eval_method, sum_algo)
        eval_value_list = eval_header[eval_method.value].to_list()
        eval_average = sum(eval_value_list) / len(eval_value_list)
        # print(eval_average) 
        result_list.append(eval_average)
        # break
    result[eval_method.value] = result_list
    df = pd.DataFrame(result, index=file_name_list)
    df.index.name
    return df

def evaluate_all_method(sum_algo:SumAlgo, ref_segment_dict:dict) -> pd.DataFrame:
    file_name_list = get_file_name_list()
    file_name_list.append("Total")
    file_name_list.append("Average")
    result = {}
    for method in eval.EvalMethod:
        eval_df = evaluate(sum_algo, ref_segment_dict, method)
        result_list = eval_df[method.value].to_list()   

        total = 0
        total = sum(result_list)
        average = total/len(result_list)
        result_list.append(total)
        result_list.append(average)

        result[method.value] = result_list

    df = pd.DataFrame(result, index=file_name_list)
    df.index.name = sum_algo.value
    return df

def evaluate_all_sum_algo(ref_segment_dict:dict) -> pd.DataFrame:
    # Data extraction
    average_list_map = {}
    method_list = [x.value for x in eval.EvalMethod]

    for algo in SumAlgo:
        if(algo == SumAlgo.REF or algo == SumAlgo.TEXTRANK):
            continue
        df = evaluate_all_method(algo, ref_segment_dict)

        average_list = []
        for method_value in method_list:
            average = df[method_value]["Average"]
            average_list.append(average)
        average_list_map[algo] = average_list
    
    df = pd.DataFrame(average_list_map, index=method_list)
    return df
       
# print(evaluate(lsa_segment_dict, ref_segment_dict, eval.EvalMethod.ROUGE_1))
evaluate_all_method(SumAlgo.LSA, ref_segment_dict)
evaluate_all_sum_algo(ref_segment_dict)
        

Unnamed: 0,SumAlgo.LSA,SumAlgo.FALCON,SumAlgo.TEXTRANK_BART,SumAlgo.TEXTRANK_FALCON,SumAlgo.TEXTRANK_STABLELM
rouge_1,0.576021,0.598212,0.661115,0.648827,0.579882
rouge_2,0.245746,0.367898,0.425214,0.455075,0.260122
rouge_l,0.576021,0.598212,0.661115,0.648827,0.579882
