1. Preparations

In [100]:
import evaluation as eval
import numpy as np
import pandas as pd
import os
import json
from enum import Enum

In [101]:
def get_pdf_summary_path() -> str:
    result = os.path.abspath('..\\..\\resources\\benchmark\\pdf_summary')
    return result

def get_pdf_summary_subpath(subpath:str) -> str:
    result = os.path.join(
        get_pdf_summary_path(), 
        subpath
    )
    return result

2. Load Files into dictionaries

In [102]:
class SumAlgo(Enum):
    LSA = 'lsa'
    TEXTRANK = 'textrank'
    REF = 'ref'

def get_file_name_list() -> list[str]:
    list = []
    for (_, _, filenames) in os.walk(get_pdf_summary_subpath(SumAlgo.REF.value)):
        for filename in filenames:
            list.append(filename)
    return list

def get_file_path_list(sumAlgo:SumAlgo) -> list[str]:
    list = []
    for (dirpath, _, filenames) in os.walk(get_pdf_summary_subpath(sumAlgo.value)):
        for filename in filenames:
            list.append(os.path.join(dirpath, filename))
    return list

def get_file_dict(sumAlgo:SumAlgo) -> dict:
    path_list = get_file_path_list(sumAlgo) 
    file_name_list = get_file_name_list()
    result = {}
    for i in range(0, len(file_name_list)):
        path = path_list[i]
        file_name = file_name_list[i]
        result[file_name] = read_file(path)
    return result 

def read_file(path:str) -> any:
    result = ''
    with open(path, 'r') as f:
        result = json.load(f)
    return result

def get_segment_dict(sumAlgo:SumAlgo) -> dict:
    file_dict = get_file_dict(sumAlgo)
    file_name_list = get_file_name_list()
    result = {}
    for file_name in file_name_list:
        value = file_dict[file_name]
        segments = value['segments'] 
        result[file_name] = segments
    return result

ref_segment_dict = get_segment_dict(SumAlgo.REF)
textrank_segment_dict = get_segment_dict(SumAlgo.TEXTRANK)
lsa_segment_dict = get_segment_dict(SumAlgo.LSA)
file_name_list = get_file_name_list()

3. Evaluate

In [103]:
# Mean rouge_1 of all paragraphs
def get_header_dict(segments:list) -> dict:
    result = {}
    for segment in segments:
        header = segment['header']
        content = segment['content']
        result[header] = content
    return result 

def get_eval_by_header(file_name:str, ref_header_dict:dict, test_header_dict:dict, eval_method:eval.EvalMethod) -> pd.DataFrame:
    ref_key_num = len(ref_header_dict.keys())
    test_key_num = len(test_header_dict.keys())
    if(ref_key_num != test_key_num):
        print("The number of keys do not match: " + len(ref_header_dict.keys()) + " " + len(ref_header_dict.keys()))
            
    result = {}
    for i in range(0, len(test_header_dict.keys())):
        header = list(ref_header_dict.keys())[i]
        ref_content = ref_header_dict[header]
        test_content = test_header_dict[header]
        eval_result = eval.eval_method_dict[eval_method](test_content, ref_content)
        result[header] = [eval_result]
    
    return pd.DataFrame(result, index=[eval_method.value]).transpose()

def evaluate(test_segment_dict:dict, ref_segment_dict:dict, eval_method:eval.EvalMethod) -> pd.DataFrame:
    file_name_list = get_file_name_list()
    result = {}
    result_list = []
    for file_name in file_name_list:
        ref_header_dict = get_header_dict(ref_segment_dict[file_name])
        test_header_dict = get_header_dict(test_segment_dict[file_name])

        eval_header = get_eval_by_header(file_name, ref_header_dict, test_header_dict, eval_method)
        eval_value_list = eval_header[eval_method.value].to_list()
        eval_average = sum(eval_value_list) / len(eval_value_list)
        # print(eval_average) 
        result_list.append(eval_average)
        # break
    result[eval_method.value] = result_list
    return pd.DataFrame(result, index=file_name_list)

def evaluate_all_method(test_segment_dict:dict, ref_segment_dict:dict) -> pd.DataFrame:
    file_name_list = get_file_name_list()
    result = {}
    for method in eval.EvalMethod:
        eval_df = evaluate(test_segment_dict, ref_segment_dict, method)
        result_list = eval_df[method.value].to_list()   
        result[method.value] = result_list
    
    return pd.DataFrame(result, index=file_name_list)

       
# print(evaluate(lsa_segment_dict, ref_segment_dict, eval.EvalMethod.ROUGE_1))
print(evaluate_all_method(lsa_segment_dict, ref_segment_dict))
        

                                                     rouge_1   rouge_2  \
Deienno_et_al._-_2024_-_Accretion_and_Uneven_De...  0.449322  0.123100   
Elokda_et_al._-_2024_-_Karma_An_Experimental_St...  0.836298  0.582345   
Hansen_et_al._-_2024_-_Productivity_and_quality...  0.600328  0.152316   
Onah_et_al._-_2023_-_A_Data-driven_Latent_Seman...  0.476862  0.393759   
Peterson_-_2024_-_AI_and_the_Problem_of_Knowled...  0.644528  0.363274   
Pires_and_Broom_-_2024_-_The_rules_of_multiplay...  0.537277  0.114212   
Raposo_et_al._-_2024_-_Mixture-of-Depths_Dynami...  0.551279  0.110263   
Smith and Coast - 2013 - The economic burden of...  0.598609  0.339455   
Urteaga_and_Wiggins_-_2024_-_Sequential_Monte_C...  0.570800  0.103810   
Viglietta_-_2024_-_History_Trees_and_Their_Appl...  0.447251  0.099845   
Yu_et_al._-_2024_-_GreedLlama_Performance_of_Fi...  0.623672  0.320823   

                                                     rouge_l  
Deienno_et_al._-_2024_-_Accretion_and_Uneven_De.