In [None]:
import os
import asyncio
import pandas as pd
import time
import ast
from tqdm import tqdm
from ragas.dataset_schema import SingleTurnSample
from ragas.metrics import (
    LLMContextPrecisionWithoutReference,
    ContextRelevance,
    AnswerAccuracy,
    SemanticSimilarity,
    Faithfulness
)
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# 设置 API Key 和 Base URL
os.environ["OPENAI_API_KEY"] = "your API key"

# 创建 LangChain LLM 实例，支持自定义 base_url
llm_instance = ChatOpenAI(
    model="deepseek-chat",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    base_url="your base url",
)

# 包装成 Ragas LLM
evaluator_llm = LangchainLLMWrapper(llm_instance)

# 创建 LangChain 嵌入实例，使用 text-embedding-3-small 模型
evaluator_embedding = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    base_url="your base url",
)

# 包装成 Ragas 嵌入
evaluator_embeddings_wrapper = LangchainEmbeddingsWrapper(evaluator_embedding)

async def main():
    # 读取输入 CSV 文件
    input_file = 'input.csv'
    df = pd.read_csv(input_file)

    # 创建评估器（在循环外创建以复用）
    context_precision = LLMContextPrecisionWithoutReference(llm=evaluator_llm)
    scorer_relevance = ContextRelevance(llm=evaluator_llm)
    scorer_accuracy = AnswerAccuracy(llm=evaluator_llm)
    scorer_similarity = SemanticSimilarity(embeddings=evaluator_embeddings_wrapper)
    scorer_faithfulness = Faithfulness(llm=evaluator_llm)

    results = []

    for index, row in tqdm(df.iterrows(), total=len(df), desc="评估进度"):
        start_time = time.time()

        user_input = row['Query']
        reference = row['Ground_truth']
        response = row['Response']
        retrieved_context_str = row['Retrievaled_context']

        # 尝试解析 retrieved_contexts 为列表，如果失败则视为单个字符串的列表
        try:
            retrieved_contexts = ast.literal_eval(retrieved_context_str)
            if not isinstance(retrieved_contexts, list):
                retrieved_contexts = [retrieved_context_str]
        except:
            retrieved_contexts = [retrieved_context_str]

        # 创建样本并计算分数
        sample1 = SingleTurnSample(
            user_input=user_input,
            response=response,
            retrieved_contexts=retrieved_contexts,
        )
        score1 = await context_precision.single_turn_ascore(sample1)

        sample2 = SingleTurnSample(
            user_input=user_input,
            retrieved_contexts=retrieved_contexts,
        )
        score2 = await scorer_relevance.single_turn_ascore(sample2)

        sample3 = SingleTurnSample(
            user_input=user_input,
            response=response,
            reference=reference
        )
        score3 = await scorer_accuracy.single_turn_ascore(sample3)

        sample4 = SingleTurnSample(
            response=response,
            reference=reference
        )
        score4 = await scorer_similarity.single_turn_ascore(sample4)

        sample5 = SingleTurnSample(
            user_input=user_input,
            response=response,
            retrieved_contexts=retrieved_contexts,
        )
        score5 = await scorer_faithfulness.single_turn_ascore(sample5)

        end_time = time.time()
        evaluation_time = end_time - start_time

        # 记录结果
        results.append({
            'Query': user_input,
            'Ground_truth': reference,
            'Response': response,
            'Retrievaled_context': retrieved_context_str,
            'Context_Precision': score1,
            'Context_Relevance': score2,
            'Answer_Accuracy': score3,
            'Semantic_Similarity': score4,
            'Faithfulness': score5,
            'Evaluation_Time': evaluation_time
        })

    # 保存结果到新的 CSV 文件
    output_df = pd.DataFrame(results)
    output_file = 'evaluation_results.csv'
    output_df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

if __name__ == "__main__":
    asyncio.run(main())

In [None]:
import os
import asyncio
import pandas as pd
import time
import ast
from tqdm import tqdm
from ragas.dataset_schema import SingleTurnSample
from ragas.metrics import (
    LLMContextPrecisionWithoutReference,
    ContextRelevance,
    AnswerAccuracy,
    SemanticSimilarity,
    Faithfulness
)
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import openpyxl  # 确保安装 openpyxl 以支持 Excel 追加

# 设置 API Key 和 Base URL
os.environ["OPENAI_API_KEY"] = "your API key"

# 创建 LangChain LLM 实例，支持自定义 base_url，并强制 JSON 输出以修复解析错误
llm_instance = ChatOpenAI(
    model="deepseek-chat",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    base_url="your base url",
    response_format={"type": "json_object"}  # 强制 JSON 输出模式，修复 OutputParserException
)

# 包装成 Ragas LLM
evaluator_llm = LangchainLLMWrapper(llm_instance)

# 创建 LangChain 嵌入实例，使用 text-embedding-3-small 模型
evaluator_embedding = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    base_url="your base url",
)

# 包装成 Ragas 嵌入
evaluator_embeddings_wrapper = LangchainEmbeddingsWrapper(evaluator_embedding)

async def main():
    # 读取输入 XLSX 文件
    input_file = 'merged_test_dataset_1.xlsx'
    df = pd.read_excel(input_file)

    # 创建评估器（在循环外创建以复用）
    context_precision = LLMContextPrecisionWithoutReference(llm=evaluator_llm)
    scorer_relevance = ContextRelevance(llm=evaluator_llm)
    scorer_accuracy = AnswerAccuracy(llm=evaluator_llm)
    scorer_similarity = SemanticSimilarity(embeddings=evaluator_embeddings_wrapper)
    scorer_faithfulness = Faithfulness(llm=evaluator_llm)

    output_file = 'evaluation_results_1.xlsx'

    # 如果输出文件不存在，创建空文件并写入表头
    if not os.path.exists(output_file):
        header_df = pd.DataFrame(columns=[
            'Query', 'Ground_truth', 'Response', 'Retrievaled_context',
            'Context_Precision', 'Context_Relevance', 'Answer_Accuracy',
            'Semantic_Similarity', 'Faithfulness', 'Evaluation_Time'
        ])
        header_df.to_excel(output_file, index=False)

    for index, row in tqdm(df.iterrows(), total=len(df), desc="评估进度"):
        start_time = time.time()

        user_input = row['Query']
        reference = row['Ground_truth']
        response = row['Response']
        retrieved_context_str = row['Retrievaled_context']

        # 尝试解析 retrieved_contexts 为列表，如果失败则视为单个字符串的列表
        try:
            retrieved_contexts = ast.literal_eval(retrieved_context_str)
            if not isinstance(retrieved_contexts, list):
                retrieved_contexts = [retrieved_context_str]
        except:
            retrieved_contexts = [retrieved_context_str]

        # 创建样本并计算分数
        sample1 = SingleTurnSample(
            user_input=user_input,
            response=response,
            retrieved_contexts=retrieved_contexts,
        )
        score1 = await context_precision.single_turn_ascore(sample1)

        sample2 = SingleTurnSample(
            user_input=user_input,
            retrieved_contexts=retrieved_contexts,
        )
        score2 = await scorer_relevance.single_turn_ascore(sample2)

        sample3 = SingleTurnSample(
            user_input=user_input,
            response=response,
            reference=reference
        )
        score3 = await scorer_accuracy.single_turn_ascore(sample3)

        sample4 = SingleTurnSample(
            response=response,
            reference=reference
        )
        score4 = await scorer_similarity.single_turn_ascore(sample4)

        sample5 = SingleTurnSample(
            user_input=user_input,
            response=response,
            retrieved_contexts=retrieved_contexts,
        )
        score5 = await scorer_faithfulness.single_turn_ascore(sample5)

        end_time = time.time()
        evaluation_time = end_time - start_time

        # 记录单行结果
        result = {
            'Query': user_input,
            'Ground_truth': reference,
            'Response': response,
            'Retrievaled_context': retrieved_context_str,
            'Context_Precision': score1,
            'Context_Relevance': score2,
            'Answer_Accuracy': score3,
            'Semantic_Similarity': score4,
            'Faithfulness': score5,
            'Evaluation_Time': evaluation_time
        }

        # 将单行结果追加到 Excel 文件（使用 openpyxl 引擎支持追加），添加重试机制以处理 PermissionError
        result_df = pd.DataFrame([result])
        retries = 3
        for attempt in range(retries):
            try:
                with pd.ExcelWriter(output_file, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:
                    # 读取现有行数
                    book = writer.book
                    sheet = book.active
                    startrow = sheet.max_row
                    # 追加数据（不包括表头）
                    result_df.to_excel(writer, index=False, header=False, startrow=startrow)
                print(f"Row {index + 1} saved to {output_file}")
                break
            except PermissionError:
                print(f"Permission denied on attempt {attempt + 1} for row {index + 1}. Please close the file if it's open. Retrying in 5 seconds...")
                time.sleep(5)
        else:
            raise PermissionError(f"Failed to write to {output_file} after {retries} attempts. Ensure the file is closed.")

if __name__ == "__main__":
    asyncio.run(main())

                response_format was transferred to model_kwargs.
                Please confirm that response_format is what you intended.
  if await self.run_code(code, result, async_=asy):
  evaluator_embeddings_wrapper = LangchainEmbeddingsWrapper(evaluator_embedding)
评估进度:   1%|          | 1/180 [00:40<2:00:20, 40.34s/it]

Row 1 saved to evaluation_results_1.xlsx


评估进度:   1%|          | 2/180 [01:16<1:51:45, 37.67s/it]

Row 2 saved to evaluation_results_1.xlsx


评估进度:   2%|▏         | 3/180 [01:30<1:19:03, 26.80s/it]

Row 3 saved to evaluation_results_1.xlsx


评估进度:   2%|▏         | 4/180 [02:02<1:25:22, 29.11s/it]

Row 4 saved to evaluation_results_1.xlsx


评估进度:   3%|▎         | 5/180 [02:22<1:15:19, 25.83s/it]

Row 5 saved to evaluation_results_1.xlsx


评估进度:   3%|▎         | 6/180 [02:38<1:05:08, 22.46s/it]

Row 6 saved to evaluation_results_1.xlsx


评估进度:   4%|▍         | 7/180 [03:29<1:31:36, 31.77s/it]

Row 7 saved to evaluation_results_1.xlsx


评估进度:   4%|▍         | 8/180 [03:50<1:21:35, 28.46s/it]

Row 8 saved to evaluation_results_1.xlsx


评估进度:   5%|▌         | 9/180 [04:14<1:16:38, 26.89s/it]

Row 9 saved to evaluation_results_1.xlsx


评估进度:   6%|▌         | 10/180 [04:28<1:04:48, 22.87s/it]

Row 10 saved to evaluation_results_1.xlsx


评估进度:   6%|▌         | 11/180 [04:47<1:01:18, 21.77s/it]

Row 11 saved to evaluation_results_1.xlsx


评估进度:   7%|▋         | 12/180 [05:18<1:09:14, 24.73s/it]

Row 12 saved to evaluation_results_1.xlsx


评估进度:   7%|▋         | 13/180 [05:49<1:13:59, 26.58s/it]

Row 13 saved to evaluation_results_1.xlsx


评估进度:   8%|▊         | 14/180 [06:17<1:14:33, 26.95s/it]

Row 14 saved to evaluation_results_1.xlsx


评估进度:   8%|▊         | 15/180 [06:58<1:25:33, 31.11s/it]

Row 15 saved to evaluation_results_1.xlsx


评估进度:   9%|▉         | 16/180 [07:23<1:19:57, 29.25s/it]

Row 16 saved to evaluation_results_1.xlsx


评估进度:   9%|▉         | 17/180 [07:46<1:14:30, 27.43s/it]

Row 17 saved to evaluation_results_1.xlsx


评估进度:  10%|█         | 18/180 [07:58<1:01:11, 22.66s/it]

Row 18 saved to evaluation_results_1.xlsx


评估进度:  11%|█         | 19/180 [08:32<1:10:38, 26.33s/it]

Row 19 saved to evaluation_results_1.xlsx


评估进度:  11%|█         | 20/180 [08:50<1:03:01, 23.64s/it]

Row 20 saved to evaluation_results_1.xlsx


评估进度:  12%|█▏        | 21/180 [09:15<1:03:36, 24.00s/it]

Row 21 saved to evaluation_results_1.xlsx


评估进度:  12%|█▏        | 22/180 [10:09<1:27:26, 33.20s/it]

Row 22 saved to evaluation_results_1.xlsx


评估进度:  13%|█▎        | 23/180 [10:56<1:37:13, 37.16s/it]

Row 23 saved to evaluation_results_1.xlsx


评估进度:  13%|█▎        | 24/180 [11:15<1:22:29, 31.73s/it]

Row 24 saved to evaluation_results_1.xlsx


评估进度:  14%|█▍        | 25/180 [11:34<1:12:38, 28.12s/it]

Row 25 saved to evaluation_results_1.xlsx


评估进度:  14%|█▍        | 26/180 [12:14<1:20:42, 31.44s/it]

Row 26 saved to evaluation_results_1.xlsx


评估进度:  15%|█▌        | 27/180 [12:31<1:09:33, 27.28s/it]

Row 27 saved to evaluation_results_1.xlsx


评估进度:  16%|█▌        | 28/180 [13:02<1:11:50, 28.36s/it]

Row 28 saved to evaluation_results_1.xlsx


评估进度:  16%|█▌        | 29/180 [13:53<1:28:21, 35.11s/it]

Row 29 saved to evaluation_results_1.xlsx


评估进度:  17%|█▋        | 30/180 [14:20<1:21:57, 32.78s/it]

Row 30 saved to evaluation_results_1.xlsx


评估进度:  17%|█▋        | 31/180 [15:42<1:57:54, 47.48s/it]

Row 31 saved to evaluation_results_1.xlsx


评估进度:  18%|█▊        | 32/180 [16:45<2:08:30, 52.09s/it]

Row 32 saved to evaluation_results_1.xlsx


评估进度:  18%|█▊        | 33/180 [17:02<1:42:06, 41.68s/it]

Row 33 saved to evaluation_results_1.xlsx


评估进度:  19%|█▉        | 34/180 [17:14<1:19:23, 32.63s/it]

Row 34 saved to evaluation_results_1.xlsx


评估进度:  19%|█▉        | 35/180 [17:29<1:06:20, 27.45s/it]

Row 35 saved to evaluation_results_1.xlsx


评估进度:  20%|██        | 36/180 [18:01<1:08:41, 28.62s/it]

Row 36 saved to evaluation_results_1.xlsx


评估进度:  21%|██        | 37/180 [18:38<1:14:24, 31.22s/it]

Row 37 saved to evaluation_results_1.xlsx


评估进度:  21%|██        | 38/180 [18:53<1:02:25, 26.38s/it]

Row 38 saved to evaluation_results_1.xlsx


评估进度:  22%|██▏       | 39/180 [19:08<53:55, 22.95s/it]  

Row 39 saved to evaluation_results_1.xlsx


评估进度:  22%|██▏       | 40/180 [19:33<54:56, 23.54s/it]

Row 40 saved to evaluation_results_1.xlsx


评估进度:  23%|██▎       | 41/180 [36:39<12:31:35, 324.43s/it]

Row 41 saved to evaluation_results_1.xlsx


评估进度:  23%|██▎       | 42/180 [37:00<8:56:33, 233.28s/it] 

Row 42 saved to evaluation_results_1.xlsx


评估进度:  24%|██▍       | 43/180 [37:17<6:24:43, 168.49s/it]

Row 43 saved to evaluation_results_1.xlsx


评估进度:  24%|██▍       | 44/180 [37:31<4:36:46, 122.11s/it]

Row 44 saved to evaluation_results_1.xlsx


评估进度:  25%|██▌       | 45/180 [38:42<4:00:15, 106.78s/it]

Row 45 saved to evaluation_results_1.xlsx


评估进度:  26%|██▌       | 46/180 [39:27<3:17:06, 88.26s/it] 

Row 46 saved to evaluation_results_1.xlsx


评估进度:  26%|██▌       | 47/180 [40:21<2:52:53, 78.00s/it]

Row 47 saved to evaluation_results_1.xlsx


评估进度:  27%|██▋       | 48/180 [41:18<2:37:48, 71.73s/it]

Row 48 saved to evaluation_results_1.xlsx


评估进度:  27%|██▋       | 49/180 [42:28<2:35:35, 71.27s/it]

Row 49 saved to evaluation_results_1.xlsx


评估进度:  28%|██▊       | 50/180 [42:43<1:57:40, 54.31s/it]

Row 50 saved to evaluation_results_1.xlsx


评估进度:  28%|██▊       | 51/180 [44:33<2:32:15, 70.82s/it]

Row 51 saved to evaluation_results_1.xlsx


评估进度:  29%|██▉       | 52/180 [44:48<1:55:38, 54.21s/it]

Row 52 saved to evaluation_results_1.xlsx


评估进度:  29%|██▉       | 53/180 [45:09<1:33:19, 44.09s/it]

Row 53 saved to evaluation_results_1.xlsx


评估进度:  30%|███       | 54/180 [45:48<1:29:30, 42.62s/it]

Row 54 saved to evaluation_results_1.xlsx


评估进度:  31%|███       | 55/180 [46:02<1:10:52, 34.02s/it]

Row 55 saved to evaluation_results_1.xlsx


评估进度:  31%|███       | 56/180 [46:18<59:27, 28.77s/it]  

Row 56 saved to evaluation_results_1.xlsx


评估进度:  32%|███▏      | 57/180 [46:42<56:02, 27.34s/it]

Row 57 saved to evaluation_results_1.xlsx


评估进度:  32%|███▏      | 58/180 [47:02<51:11, 25.18s/it]

Row 58 saved to evaluation_results_1.xlsx


评估进度:  33%|███▎      | 59/180 [47:24<48:23, 24.00s/it]

Row 59 saved to evaluation_results_1.xlsx


评估进度:  33%|███▎      | 60/180 [47:43<44:59, 22.50s/it]

Row 60 saved to evaluation_results_1.xlsx


评估进度:  34%|███▍      | 61/180 [47:59<41:07, 20.74s/it]

Row 61 saved to evaluation_results_1.xlsx


评估进度:  34%|███▍      | 62/180 [48:31<47:26, 24.12s/it]

Row 62 saved to evaluation_results_1.xlsx


评估进度:  35%|███▌      | 63/180 [48:47<42:18, 21.70s/it]

Row 63 saved to evaluation_results_1.xlsx


评估进度:  36%|███▌      | 64/180 [49:02<37:57, 19.63s/it]

Row 64 saved to evaluation_results_1.xlsx


评估进度:  36%|███▌      | 65/180 [49:20<36:24, 19.00s/it]

Row 65 saved to evaluation_results_1.xlsx


评估进度:  37%|███▋      | 66/180 [49:31<31:41, 16.68s/it]

Row 66 saved to evaluation_results_1.xlsx


评估进度:  37%|███▋      | 67/180 [51:33<1:30:52, 48.25s/it]

Row 67 saved to evaluation_results_1.xlsx


评估进度:  38%|███▊      | 68/180 [52:44<1:42:44, 55.04s/it]

Row 68 saved to evaluation_results_1.xlsx


评估进度:  38%|███▊      | 69/180 [53:42<1:43:56, 56.18s/it]

Row 69 saved to evaluation_results_1.xlsx


评估进度:  39%|███▉      | 70/180 [54:04<1:24:04, 45.86s/it]

Row 70 saved to evaluation_results_1.xlsx


评估进度:  39%|███▉      | 71/180 [54:25<1:09:33, 38.29s/it]

Row 71 saved to evaluation_results_1.xlsx


评估进度:  40%|████      | 72/180 [59:18<3:26:35, 114.77s/it]

Row 72 saved to evaluation_results_1.xlsx


评估进度:  41%|████      | 73/180 [1:01:21<3:28:50, 117.11s/it]

Row 73 saved to evaluation_results_1.xlsx


评估进度:  41%|████      | 74/180 [1:02:36<3:04:43, 104.56s/it]

Row 74 saved to evaluation_results_1.xlsx


评估进度:  42%|████▏     | 75/180 [1:04:34<3:09:52, 108.50s/it]

Row 75 saved to evaluation_results_1.xlsx


评估进度:  42%|████▏     | 76/180 [1:05:14<2:32:45, 88.13s/it] 

Row 76 saved to evaluation_results_1.xlsx


评估进度:  43%|████▎     | 77/180 [1:07:16<2:48:42, 98.28s/it]

Row 77 saved to evaluation_results_1.xlsx


评估进度:  43%|████▎     | 78/180 [1:07:44<2:11:08, 77.14s/it]

Row 78 saved to evaluation_results_1.xlsx


评估进度:  44%|████▍     | 79/180 [1:09:22<2:20:16, 83.33s/it]

Row 79 saved to evaluation_results_1.xlsx


评估进度:  44%|████▍     | 80/180 [1:10:40<2:16:29, 81.89s/it]

Row 80 saved to evaluation_results_1.xlsx


评估进度:  45%|████▌     | 81/180 [1:11:37<2:02:46, 74.41s/it]

Row 81 saved to evaluation_results_1.xlsx


评估进度:  46%|████▌     | 82/180 [1:12:40<1:55:52, 70.94s/it]

Row 82 saved to evaluation_results_1.xlsx


评估进度:  46%|████▌     | 83/180 [1:13:45<1:51:37, 69.04s/it]

Row 83 saved to evaluation_results_1.xlsx


评估进度:  47%|████▋     | 84/180 [1:15:42<2:13:25, 83.39s/it]

Row 84 saved to evaluation_results_1.xlsx


评估进度:  47%|████▋     | 85/180 [1:16:54<2:06:38, 79.99s/it]

Row 85 saved to evaluation_results_1.xlsx


评估进度:  48%|████▊     | 86/180 [1:19:26<2:39:20, 101.70s/it]

Row 86 saved to evaluation_results_1.xlsx


评估进度:  48%|████▊     | 87/180 [1:20:20<2:15:39, 87.52s/it] 

Row 87 saved to evaluation_results_1.xlsx


评估进度:  49%|████▉     | 88/180 [1:21:03<1:53:39, 74.12s/it]

Row 88 saved to evaluation_results_1.xlsx


评估进度:  49%|████▉     | 89/180 [1:22:47<2:05:53, 83.01s/it]

Row 89 saved to evaluation_results_1.xlsx


评估进度:  50%|█████     | 90/180 [1:24:50<2:22:30, 95.01s/it]

Row 90 saved to evaluation_results_1.xlsx


评估进度:  51%|█████     | 91/180 [1:26:43<2:28:57, 100.43s/it]

Row 91 saved to evaluation_results_1.xlsx


评估进度:  51%|█████     | 92/180 [1:28:47<2:37:29, 107.38s/it]

Row 92 saved to evaluation_results_1.xlsx


评估进度:  52%|█████▏    | 93/180 [1:30:22<2:30:14, 103.62s/it]

Row 93 saved to evaluation_results_1.xlsx


评估进度:  52%|█████▏    | 94/180 [1:32:02<2:27:03, 102.60s/it]

Row 94 saved to evaluation_results_1.xlsx


评估进度:  53%|█████▎    | 95/180 [1:34:14<2:37:50, 111.42s/it]

Row 95 saved to evaluation_results_1.xlsx


评估进度:  53%|█████▎    | 96/180 [1:35:29<2:20:38, 100.46s/it]

Row 96 saved to evaluation_results_1.xlsx


评估进度:  54%|█████▍    | 97/180 [1:37:04<2:16:57, 99.01s/it] 

Row 97 saved to evaluation_results_1.xlsx


评估进度:  54%|█████▍    | 98/180 [1:38:19<2:05:25, 91.78s/it]

Row 98 saved to evaluation_results_1.xlsx


评估进度:  55%|█████▌    | 99/180 [1:39:26<1:53:44, 84.25s/it]

Row 99 saved to evaluation_results_1.xlsx


评估进度:  56%|█████▌    | 100/180 [1:42:08<2:23:38, 107.74s/it]

Row 100 saved to evaluation_results_1.xlsx


评估进度:  56%|█████▌    | 101/180 [1:43:56<2:21:46, 107.68s/it]

Row 101 saved to evaluation_results_1.xlsx


评估进度:  57%|█████▋    | 102/180 [1:47:16<2:55:51, 135.28s/it]

Row 102 saved to evaluation_results_1.xlsx


评估进度:  57%|█████▋    | 103/180 [1:49:27<2:51:56, 133.98s/it]

Row 103 saved to evaluation_results_1.xlsx


评估进度:  58%|█████▊    | 104/180 [1:50:32<2:23:42, 113.45s/it]

Row 104 saved to evaluation_results_1.xlsx


评估进度:  58%|█████▊    | 105/180 [1:52:51<2:31:13, 120.98s/it]

Row 105 saved to evaluation_results_1.xlsx


评估进度:  59%|█████▉    | 106/180 [1:54:11<2:13:59, 108.65s/it]

Row 106 saved to evaluation_results_1.xlsx


评估进度:  59%|█████▉    | 107/180 [1:55:29<2:01:17, 99.69s/it] 

Row 107 saved to evaluation_results_1.xlsx


评估进度:  60%|██████    | 108/180 [1:57:14<2:01:32, 101.28s/it]

Row 108 saved to evaluation_results_1.xlsx


评估进度:  61%|██████    | 109/180 [1:59:24<2:09:43, 109.63s/it]

Row 109 saved to evaluation_results_1.xlsx


评估进度:  61%|██████    | 110/180 [2:02:03<2:25:19, 124.56s/it]

Row 110 saved to evaluation_results_1.xlsx


评估进度:  62%|██████▏   | 111/180 [2:03:39<2:13:21, 115.96s/it]

Row 111 saved to evaluation_results_1.xlsx


评估进度:  62%|██████▏   | 112/180 [2:04:55<1:57:56, 104.07s/it]

Row 112 saved to evaluation_results_1.xlsx


评估进度:  63%|██████▎   | 113/180 [2:07:15<2:08:05, 114.70s/it]

Row 113 saved to evaluation_results_1.xlsx


评估进度:  63%|██████▎   | 114/180 [2:08:39<1:56:05, 105.54s/it]

Row 114 saved to evaluation_results_1.xlsx


评估进度:  64%|██████▍   | 115/180 [2:09:55<1:44:38, 96.60s/it] 

Row 115 saved to evaluation_results_1.xlsx


评估进度:  64%|██████▍   | 116/180 [2:10:39<1:26:27, 81.05s/it]

Row 116 saved to evaluation_results_1.xlsx


评估进度:  65%|██████▌   | 117/180 [2:11:32<1:16:01, 72.40s/it]

Row 117 saved to evaluation_results_1.xlsx


评估进度:  66%|██████▌   | 118/180 [2:13:13<1:23:41, 81.00s/it]

Row 118 saved to evaluation_results_1.xlsx


评估进度:  66%|██████▌   | 119/180 [2:15:13<1:34:28, 92.92s/it]

Row 119 saved to evaluation_results_1.xlsx


评估进度:  67%|██████▋   | 120/180 [2:16:04<1:20:14, 80.25s/it]

Row 120 saved to evaluation_results_1.xlsx


评估进度:  67%|██████▋   | 121/180 [2:16:52<1:09:27, 70.63s/it]

Row 121 saved to evaluation_results_1.xlsx


评估进度:  68%|██████▊   | 122/180 [2:18:04<1:08:41, 71.05s/it]

Row 122 saved to evaluation_results_1.xlsx


评估进度:  68%|██████▊   | 123/180 [2:19:52<1:18:01, 82.12s/it]

Row 123 saved to evaluation_results_1.xlsx


评估进度:  69%|██████▉   | 124/180 [2:21:00<1:12:31, 77.71s/it]

Row 124 saved to evaluation_results_1.xlsx


评估进度:  69%|██████▉   | 125/180 [2:23:35<1:32:29, 100.90s/it]

Row 125 saved to evaluation_results_1.xlsx


评估进度:  70%|███████   | 126/180 [2:24:30<1:18:24, 87.13s/it] 

Row 126 saved to evaluation_results_1.xlsx


评估进度:  71%|███████   | 127/180 [2:24:53<1:00:08, 68.08s/it]

Row 127 saved to evaluation_results_1.xlsx


评估进度:  71%|███████   | 128/180 [2:26:18<1:03:21, 73.10s/it]

Row 128 saved to evaluation_results_1.xlsx


评估进度:  72%|███████▏  | 129/180 [2:26:34<47:39, 56.07s/it]  

Row 129 saved to evaluation_results_1.xlsx


评估进度:  72%|███████▏  | 130/180 [2:27:02<39:35, 47.50s/it]

Row 130 saved to evaluation_results_1.xlsx


评估进度:  73%|███████▎  | 131/180 [2:27:21<31:53, 39.05s/it]

Row 131 saved to evaluation_results_1.xlsx


评估进度:  73%|███████▎  | 132/180 [2:27:40<26:18, 32.88s/it]

Row 132 saved to evaluation_results_1.xlsx


评估进度:  74%|███████▍  | 133/180 [2:27:57<22:00, 28.09s/it]

Row 133 saved to evaluation_results_1.xlsx


评估进度:  74%|███████▍  | 134/180 [2:28:12<18:38, 24.31s/it]

Row 134 saved to evaluation_results_1.xlsx


评估进度:  75%|███████▌  | 135/180 [2:28:42<19:27, 25.95s/it]

Row 135 saved to evaluation_results_1.xlsx


评估进度:  76%|███████▌  | 136/180 [2:28:59<17:08, 23.37s/it]

Row 136 saved to evaluation_results_1.xlsx


评估进度:  76%|███████▌  | 137/180 [2:29:18<15:49, 22.08s/it]

Row 137 saved to evaluation_results_1.xlsx


评估进度:  77%|███████▋  | 138/180 [2:29:43<15:59, 22.83s/it]

Row 138 saved to evaluation_results_1.xlsx


评估进度:  77%|███████▋  | 139/180 [2:30:24<19:24, 28.40s/it]

Row 139 saved to evaluation_results_1.xlsx


评估进度:  78%|███████▊  | 140/180 [2:30:58<20:05, 30.13s/it]

Row 140 saved to evaluation_results_1.xlsx


评估进度:  78%|███████▊  | 141/180 [2:31:24<18:37, 28.65s/it]

Row 141 saved to evaluation_results_1.xlsx


评估进度:  79%|███████▉  | 142/180 [2:31:44<16:31, 26.08s/it]

Row 142 saved to evaluation_results_1.xlsx


评估进度:  79%|███████▉  | 143/180 [2:33:03<25:57, 42.10s/it]

Row 143 saved to evaluation_results_1.xlsx


评估进度:  80%|████████  | 144/180 [2:35:27<43:32, 72.56s/it]

Row 144 saved to evaluation_results_1.xlsx


评估进度:  81%|████████  | 145/180 [2:35:56<34:44, 59.57s/it]

Row 145 saved to evaluation_results_1.xlsx


评估进度:  81%|████████  | 146/180 [2:37:20<37:54, 66.90s/it]

Row 146 saved to evaluation_results_1.xlsx


评估进度:  82%|████████▏ | 147/180 [2:38:42<39:19, 71.49s/it]

Row 147 saved to evaluation_results_1.xlsx


评估进度:  82%|████████▏ | 148/180 [2:39:32<34:42, 65.08s/it]

Row 148 saved to evaluation_results_1.xlsx


评估进度:  83%|████████▎ | 149/180 [2:41:06<38:02, 73.62s/it]

Row 149 saved to evaluation_results_1.xlsx


评估进度:  83%|████████▎ | 150/180 [2:42:12<35:41, 71.38s/it]

Row 150 saved to evaluation_results_1.xlsx


评估进度:  84%|████████▍ | 151/180 [2:43:12<32:51, 67.99s/it]

Row 151 saved to evaluation_results_1.xlsx


评估进度:  84%|████████▍ | 152/180 [2:44:53<36:16, 77.73s/it]

Row 152 saved to evaluation_results_1.xlsx


评估进度:  85%|████████▌ | 153/180 [2:45:38<30:37, 68.07s/it]

Row 153 saved to evaluation_results_1.xlsx


评估进度:  86%|████████▌ | 154/180 [2:47:36<35:54, 82.85s/it]

Row 154 saved to evaluation_results_1.xlsx


评估进度:  86%|████████▌ | 155/180 [2:48:49<33:22, 80.11s/it]

Row 155 saved to evaluation_results_1.xlsx


评估进度:  87%|████████▋ | 156/180 [2:50:03<31:19, 78.32s/it]

Row 156 saved to evaluation_results_1.xlsx


评估进度:  87%|████████▋ | 157/180 [2:52:20<36:40, 95.67s/it]

Row 157 saved to evaluation_results_1.xlsx


评估进度:  88%|████████▊ | 158/180 [2:53:48<34:18, 93.58s/it]

Row 158 saved to evaluation_results_1.xlsx


评估进度:  88%|████████▊ | 159/180 [2:54:52<29:38, 84.70s/it]

Row 159 saved to evaluation_results_1.xlsx


评估进度:  89%|████████▉ | 160/180 [2:56:12<27:44, 83.22s/it]

Row 160 saved to evaluation_results_1.xlsx


评估进度:  89%|████████▉ | 161/180 [2:56:40<21:06, 66.68s/it]

Row 161 saved to evaluation_results_1.xlsx


评估进度:  90%|█████████ | 162/180 [2:57:06<16:20, 54.45s/it]

Row 162 saved to evaluation_results_1.xlsx


评估进度:  91%|█████████ | 163/180 [2:57:35<13:15, 46.80s/it]

Row 163 saved to evaluation_results_1.xlsx


评估进度:  91%|█████████ | 164/180 [2:58:08<11:20, 42.55s/it]

Row 164 saved to evaluation_results_1.xlsx


评估进度:  92%|█████████▏| 165/180 [3:00:38<18:45, 75.03s/it]

Row 165 saved to evaluation_results_1.xlsx


评估进度:  92%|█████████▏| 166/180 [3:00:53<13:17, 56.97s/it]

Row 166 saved to evaluation_results_1.xlsx


评估进度:  93%|█████████▎| 167/180 [3:01:12<09:51, 45.53s/it]

Row 167 saved to evaluation_results_1.xlsx


评估进度:  93%|█████████▎| 168/180 [3:01:32<07:36, 38.01s/it]

Row 168 saved to evaluation_results_1.xlsx


评估进度:  94%|█████████▍| 169/180 [3:01:51<05:53, 32.12s/it]

Row 169 saved to evaluation_results_1.xlsx


评估进度:  94%|█████████▍| 170/180 [3:02:09<04:39, 27.92s/it]

Row 170 saved to evaluation_results_1.xlsx


评估进度:  95%|█████████▌| 171/180 [3:03:02<05:18, 35.35s/it]

Row 171 saved to evaluation_results_1.xlsx


评估进度:  96%|█████████▌| 172/180 [3:03:36<04:39, 34.99s/it]

Row 172 saved to evaluation_results_1.xlsx


评估进度:  96%|█████████▌| 173/180 [3:04:01<03:43, 31.97s/it]

Row 173 saved to evaluation_results_1.xlsx


评估进度:  97%|█████████▋| 174/180 [3:04:25<02:57, 29.58s/it]

Row 174 saved to evaluation_results_1.xlsx


评估进度:  97%|█████████▋| 175/180 [3:04:44<02:12, 26.44s/it]

Row 175 saved to evaluation_results_1.xlsx


评估进度:  98%|█████████▊| 176/180 [3:05:49<02:32, 38.18s/it]

Row 176 saved to evaluation_results_1.xlsx


评估进度:  98%|█████████▊| 177/180 [3:06:31<01:57, 39.08s/it]

Row 177 saved to evaluation_results_1.xlsx


评估进度:  99%|█████████▉| 178/180 [3:07:30<01:30, 45.28s/it]

Row 178 saved to evaluation_results_1.xlsx


评估进度:  99%|█████████▉| 179/180 [3:08:05<00:42, 42.02s/it]

Row 179 saved to evaluation_results_1.xlsx


评估进度: 100%|██████████| 180/180 [3:08:31<00:00, 62.84s/it]

Row 180 saved to evaluation_results_1.xlsx



