In [None]:
import json
import os
from tqdm import tqdm
from PACKAGE import multi_rouge
from rouge_score.scoring import AggregateScore, Score
from PACKAGE import metric_realization
import openai
from langchain.chat_models import ChatOpenAI
import numpy as np

# Load api_key

In [None]:
with open('../../keys.json', 'r') as f:
    api_keys = json.load(f)
api_key = api_keys[0]

# Calculate tokens

In [None]:
def calculate_tokens(articles: list):
    llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k-0613", openai_api_key=api_key, temperature=0.7, max_tokens=600)
    return [llm.get_num_tokens(str(article)) for article in articles]


path = 'QMSum/oracle'
model_name = 'bart'

# Load pred
pred_file = model_name + '_summary.json'
file_path = os.path.join(path, 'summary/' + pred_file)
with open(file_path, 'r') as f:
    predictions = json.load(f)

# Load ref
ref_file = 'test.json'
file_path = os.path.join(path, ref_file)
if os.path.exists(file_path):
    with open(file_path, 'r') as f:
        references = json.load(f)
    references = [
        data_item['Summary']
        for data_item in references]
print(calculate_tokens(predictions))
print(calculate_tokens(references))

In [None]:
# Get prompt
metric_list = ['coh', 'con', 'flu', 'rel']
metric_type = metric_list[1]
prompt = open('GPTeval/prompts/' + metric_type + '_detailed.txt').read()
# Get messages
messages = []
for index, prediction in enumerate(predictions):
    reference = references[index]
    cur_prompt = prompt.replace('{{Document}}', reference).replace('{{Summary}}', prediction)
    messages.append([{"role": "system", "content": cur_prompt}])
print(len(predictions))
print(np.mean(calculate_tokens(messages)))

# Print rouge squality score

In [None]:
# Print rouge score
for root, dirs, files in os.walk('SQuALITY'):
    for file in files:
        if file == 'bart_evaluate_squality_rouge.json':
            with open(os.path.join(root, file), 'r') as f:
                rouge = f.read()
                obj_rouge = eval(rouge)
                print(root)
                print(f"rouge1:\n{obj_rouge['rouge1'].mid.fmeasure * 100:.2f}")
                print(f"rouge2:\n{obj_rouge['rouge2'].mid.fmeasure * 100:.2f}")
                print(f"rougeL:\n{obj_rouge['rougeL'].mid.fmeasure * 100:.2f}")


# Print rouge score

In [46]:
# Print rouge score
for root, dirs, files in os.walk('QMSum'):
    for file in files:
        if file == 'pri_rouge.json':
            with open(os.path.join(root, file), 'r') as f:
                rouge = json.load(f)
            for key, value in rouge.items():
                print(root)
                print(key)
                print(value)

QMSum\dense\MIN\evaluation
rouge1
0.32345118469097134
QMSum\dense\MIN\evaluation
rouge2
0.08996982718719525
QMSum\dense\MIN\evaluation
rougeL
0.17534373968372713
QMSum\dense\MIN\evaluation
rougeLsum
0.17587356617242386
QMSum\LLM-embedding\MIN\evaluation
rouge1
0.28786168325672334
QMSum\LLM-embedding\MIN\evaluation
rouge2
0.06530739852185932
QMSum\LLM-embedding\MIN\evaluation
rougeL
0.15674912768659122
QMSum\LLM-embedding\MIN\evaluation
rougeLsum
0.15685553676713304
QMSum\oracle\evaluation
rouge1
0.29086849594636666
QMSum\oracle\evaluation
rouge2
0.06927507342764289
QMSum\oracle\evaluation
rougeL
0.15942051204427643
QMSum\oracle\evaluation
rougeLsum
0.15952767316371966
QMSum\sparse\MIN\evaluation
rouge1
0.27348283753624225
QMSum\sparse\MIN\evaluation
rouge2
0.06166248008805346
QMSum\sparse\MIN\evaluation
rougeL
0.1609449384409522
QMSum\sparse\MIN\evaluation
rougeLsum
0.16081344797806874


# Print bert score

In [None]:
# Print bert score
for root, dirs, files in os.walk("QMSum"):
    for file in files:
        if file == "bart_bert_score.json":
            with open(os.path.join(root, file), "r") as f:
                bert = json.load(f)
            average_f1 = bert['average_f1']
            print(root)
            print(f"{average_f1 * 100:.2f}")

# Rename

In [None]:
# rename
for root, dirs, files in os.walk("../ODMDS_share_evaluate/SQuALITY"):
    if root.endswith('summary'):
        prefix = 'newSummary_'  # 要替换的前缀
        new_prefix = 'gpt3_summary_'  # 新的前缀
        for filename in files:
            if filename.startswith('newSummary_'):
                new_filename = new_prefix + filename[len(prefix):]
                os.rename(os.path.join(root, filename), os.path.join(root, new_filename))
    if root.endswith('evaluation'):
        new_prefix = 'gpt3_'  # 新的前缀
        for filename in files:
            if filename.startswith('evaluate'):
                new_filename = new_prefix + filename
                os.rename(os.path.join(root, filename), os.path.join(root, new_filename))
    for filename in files:
        if filename == 'all_results.json':
            os.remove(os.path.join(root, filename))

# Delete file

In [None]:
# rename
for root, dirs, files in os.walk("../ODMDS_share_evaluate"):
    for filename in files:
        if 'evaluate_' in filename:
            file_path = os.path.join(root, filename)
            os.remove(file_path)

# Print GPT eval

In [None]:
metric_list = ['coh', 'con', 'flu', 'rel']
model_list = ['bart_', 'gpt3_']
metric_index = 1
model_index = 0
suffix = model_list[model_index] + metric_list[metric_index] + '_gpteval.json'
for root, dirs, files in os.walk("QMSum"):
    for file in files:
        if file == suffix:
            with open(os.path.join(root, file), "r") as f:
                gpteval = json.load(f)
            value_li = []
            for key, value in gpteval.items():
                value_li.extend(value)
            # average = sum(value_li) / len(value_li)
            average = gpteval['average'][0]
            # gpteval['average'] = [average]
            print(os.path.join(root, file))
            print(f'{average * 10:.2f}')
            # with open(os.path.join(root, file), "w") as f:
            #     temp = json.dumps(gpteval)
            #     f.write(temp)

# Create file

In [None]:
for root, dirs, files in os.walk("QMSum"):
    if not files and not dirs:
        with open(os.path.join(root, 'empty.txt'), 'w') as f:
            f.write('1')

# Test random index of primera

In [52]:
with open('QMSum/randomIndex/index.json', 'r') as f:
    random_index_list = json.load(f)
for root,dirs,files in os.walk('QMSum'):
    for filename in files:
        filepath=os.path.join(root,filename)
        if 'pri_rel' in filename:
            with open(filepath,'r') as f:
                rel=json.load(f)
            rel=[rel['Summary'][index] for index in random_index_list]
            print(np.mean(rel))


1.7
1.6
1.7
1.8
