In [None]:
import pandas as pd
import re
import json

# 从JSON文件读取alias字典
alias_file_path = '/opt/tiger/trl/consistency/data/cleaned.ent2mq_wiki_alias.cleaned.json'
with open(alias_file_path, 'r', encoding='utf-8') as f:
    alias_dict = json.load(f)

def match_with_alias(answer, expected, alias_dict):
    # 检查原始的expected是否能匹配
    if check_answer_in_output(answer, expected):
        return True
    
    # 检查别名是否能匹配
    if expected in alias_dict:
        for alias in alias_dict[expected]:
            if check_answer_in_output(answer, alias):
                return True
    return False

def check_answer_in_output(answer, expected):
    answer_words = set(re.findall(r'\w+', str(answer).lower()))
    expected_words = set(re.findall(r'\w+', str(expected).lower()))
    return bool(answer_words & expected_words)

def check_answers(row, alias_dict):
    # 获取知识图谱的s, r, o
    s = row['subject']
    r = row['relation']
    o = row['object']
    
    # 获取模型输出的答案
    answer1 = row['question1_answer']
    answer2 = row['question2_answer']
    answer3 = row['question3_answer']
    
    # 检查答案是否包含在知识图谱中，使用别名匹配
    result1 = match_with_alias(answer1, o, alias_dict)
    result2 = match_with_alias(answer2, s, alias_dict)
    result3 = match_with_alias(answer3, o, alias_dict)
    
    return result1, result2, result3

# 读取Excel文件
file_path = '/opt/tiger/trl/consistency/result/result_v1.xlsx'
df = pd.read_excel(file_path)

# 新增三列保存检查结果
df['问题1_答案是否正确'] = df.apply(lambda row: check_answers(row, alias_dict)[0], axis=1)
df['问题2_识别是否正确'] = df.apply(lambda row: check_answers(row, alias_dict)[1], axis=1)
df['问题3_回答是否正确'] = df.apply(lambda row: check_answers(row, alias_dict)[2], axis=1)

# 保存结果到新的Excel文件
df.to_excel('/opt/tiger/trl/consistency/result/result_v1_checked.xlsx', index=False)