# 准确性（Accuracy）


In [None]:
def accuracy(true_answers, predicted_answers):
    """
    计算准确性（Accuracy）

    :param true_answers: 真实的答案（列表形式）
    :param predicted_answers: 模型预测的答案（列表形式）
    :return: 准确性值
    """
    correct = 0
    total = len(true_answers)

    for true, predicted in zip(true_answers, predicted_answers):
        if true == predicted:  # 如果生成的答案与真实答案完全匹配
            correct += 1

    return correct / total if total > 0 else 0

# 精确度（Precision）


In [None]:
from sklearn.metrics import precision_score


def precision(true_answers, predicted_answers):
    """
    计算精确度（Precision）

    :param true_answers: 真实的答案（列表形式）
    :param predicted_answers: 模型预测的答案（列表形式）
    :return: 精确度值
    """
    # 假设答案已经分词或标记化，转为词汇级别的比较
    true_answers = [word for ans in true_answers for word in ans.split()]
    predicted_answers = [word for ans in predicted_answers for word in ans.split()]

    return precision_score(
        true_answers, predicted_answers, average="binary"
    )  # 根据具体任务调整

# 召回率（Recall）


In [None]:
from sklearn.metrics import recall_score


def recall(true_answers, predicted_answers):
    """
    计算召回率（Recall）

    :param true_answers: 真实的答案（列表形式）
    :param predicted_answers: 模型预测的答案（列表形式）
    :return: 召回率值
    """
    # 假设答案已经分词或标记化，转为词汇级别的比较
    true_answers = [word for ans in true_answers for word in ans.split()]
    predicted_answers = [word for ans in predicted_answers for word in ans.split()]

    return recall_score(
        true_answers, predicted_answers, average="binary"
    )  # 根据具体任务调整

# F1 值（F1-Score）


In [None]:
from sklearn.metrics import f1_score


def f1(true_answers, predicted_answers):
    """
    计算 F1 值（F1-Score）

    :param true_answers: 真实的答案（列表形式）
    :param predicted_answers: 模型预测的答案（列表形式）
    :return: F1 值
    """
    # 假设答案已经分词或标记化，转为词汇级别的比较
    true_answers = [word for ans in true_answers for word in ans.split()]
    predicted_answers = [word for ans in predicted_answers for word in ans.split()]

    return f1_score(
        true_answers, predicted_answers, average="binary"
    )  # 根据具体任务调整

# BLEU


In [None]:
from nltk.translate.bleu_score import sentence_bleu


def bleu_score(reference, hypothesis):
    """
    计算BLEU分数

    :param reference: 真实答案（列表，分词后的单词）
    :param hypothesis: 生成的答案（列表，分词后的单词）
    :return: BLEU得分
    """
    return sentence_bleu([reference], hypothesis)

# ROUGE


In [1]:
import os
from tempfile import mkdtemp
from pyrouge import Rouge155


def rouge_score(reference_text, prediction_text):
    """
    计算ROUGE分数

    Args:
        reference_text (str): 参考文本(原文)
        prediction_text (str): 预测文本(生成的摘要)

    Returns:
        dict: ROUGE分数,包含precision、recall、f-score等指标
    """
    # 创建临时目录
    temp_dir = mkdtemp()
    system_dir = os.path.join(temp_dir, "system")
    model_dir = os.path.join(temp_dir, "model")
    os.makedirs(system_dir)
    os.makedirs(model_dir)

    # 写入文本文件
    with open(os.path.join(system_dir, "prediction.txt"), "w", encoding="utf-8") as f:
        f.write(prediction_text)
    with open(os.path.join(model_dir, "reference.txt"), "w", encoding="utf-8") as f:
        f.write(reference_text)

    # 初始化Rouge155
    rouge = Rouge155()
    rouge.system_dir = system_dir
    rouge.model_dir = model_dir
    rouge.system_filename_pattern = "prediction.txt"
    rouge.model_filename_pattern = "reference.txt"

    # 运行评估
    scores = rouge.evaluate()
    scores_dict = rouge.output_to_dict(scores)

    # 清理临时文件
    os.remove(os.path.join(system_dir, "prediction.txt"))
    os.remove(os.path.join(model_dir, "reference.txt"))
    os.rmdir(system_dir)
    os.rmdir(model_dir)
    os.rmdir(temp_dir)

    return scores_dict

# METEOR


In [None]:
from nltk.translate import meteor_score


def meteor(reference, hypothesis):
    """
    计算METEOR分数

    :param reference: 真实答案（字符串）
    :param hypothesis: 生成的答案（字符串）
    :return: METEOR得分
    """
    return meteor_score.meteor_score([reference], hypothesis)

In [None]:
import jieba

true_answers = [
    "这是第一个真实答案。",
    "这是第二个真实答案。",
    "这是第三个真实答案。"
]

predicted_answers = [
    "这是第一个预测答案。",
    "这是第二个预测答案。",
    "这是第三个预测答案。"
]

# 分词处理
true_answers = [" ".join(jieba.cut(ans)) for ans in true_answers]
predicted_answers = [" ".join(jieba.cut(ans)) for ans in predicted_answers]

print(true_answers)
print(predicted_answers)