### 初始化全局变量，导入包

In [3]:
import os
import sys
import json
from model import call_huoshan,call_openai
import pandas as pd
if "__file__" in globals():
    os.chdir(os.path.dirname(os.path.abspath(__file__)))

raw_data_path= os.path.join("raw_data")
scienceQA_path = os.path.join(raw_data_path, "ScienceQA")
sciKnowEval_path = os.path.join(raw_data_path, "SciKnowEval")

### 查看并读取sciQA数据

In [4]:
ScienceQA_processed = os.path.join(raw_data_path, "ScienceQA_processed.json")
# ScienceQA_processed_with_image = os.path.join(raw_data_path, "ScienceQA_processed_with_image.json")
with open(ScienceQA_processed, "r") as f:
    sciQA_data = json.load(f)

In [5]:

image_output_dir = os.path.join(raw_data_path, "ScienceQA", "images")
os.makedirs(image_output_dir, exist_ok=True) # 如果文件夹不存在则创建

def save_image(row, id):
    if row["image"] is not None and isinstance(row["image"], dict) and "bytes" in row["image"]:
        image_bytes = row['image']['bytes']
        if image_bytes: # 确保字节数据不为空
            # 构建图片文件名，可以使用索引 i 或者其他唯一标识符
            # 假设图片是 png 格式，如果不是，需要根据实际情况调整扩展名
            image_filename = f"image_{id}.png"
            image_filepath = os.path.join(image_output_dir, image_filename)
            
            try:
                with open(image_filepath, "wb") as img_file: # "wb" 表示以二进制写入模式打开
                    img_file.write(image_bytes)
                # print(f"Saved image to {image_filepath}")
            except Exception as e:
                print(f"Error saving image {image_filepath}: {e}")

#### 根据一个问题，以及不同的文件类型，构建传给模型的最终prompt

In [6]:


def sciQA_build_prompt(row):
    question = row["question"]
    choices = row["choices"]
    hint= row["hint"]
    formatted_options = []
    for i, option_text in enumerate(choices):
        formatted_options.append(f'({i}): {option_text}') # 注意这里选项文本也被引号包围了
    choices_with_indices = "\n".join(formatted_options)
    prompt=f"""
Review the question and the options provided below. Each option is clearly labeled with its numerical index.
Select the option that best answers the question.
Respond with ONLY the numerical index label of your chosen option.
Your entire response must be a single integer. Do not include any other text or explanations.
Input (Optional, for reference only, may be empty or not needed):
{hint} 
Question:
{question}
Options:
{choices_with_indices}
Answer Index:
    """
    # prompt=f"""
    # 关于下面这些问题，我注意到有些问题的选项可能不完整或不准确。这个数据集是有三个字段，question、choices和hint。
    # 正常来说hint是用来提示问题的，但是对于某些question，hint可能就是上下文，缺少hint这个问题无法回答。
    # 我在下面给你每个问题的question、choices和hint，请你判断hint是否是回答这个问题所必须的。(注意，这里的必须指的是缺少hint这个无法通过任何推理选出某个选项)
    # 如果是，则返回"yes"，否则返回"no"，如果提供给你的hint是空的，则返回"null"
    # Question:
    # {question}
    # Choices:
    # {choices_with_indices}
    # Hint:
    # {hint}
    # 注意，回答只能是yes，或者no，或者null，不要包含其他任何字符。
    # Is the hint necessary to answer the question? (yes/no/null):
    # """
    return prompt

In [7]:
import hashlib

def generate_md5(input_string):
    # 创建一个 md5 hash 对象
    md5_hash = hashlib.md5()
    
    # 将输入的字符串转换为字节串（因为 hashlib 需要字节类型的数据）
    input_bytes = input_string.encode('utf-8')
    
    # 更新哈希对象
    md5_hash.update(input_bytes)
    
    # 获取哈希值的十六进制表示
    md5_digest = md5_hash.hexdigest()
    
    return md5_digest


#### 都是选择题，直接使用规则比对

In [8]:
def sciQA_rule_verifier(question, groundtruth, model_content):
    if groundtruth == model_content:
        return True
    else:
        return False

#### 处理sciQA数据：生成generation，调用verifier，整合成符合要求的最终dict格式

In [None]:
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
import numpy as np
Lock = Lock()


def sciKnowEval_process_row(row):

    solution = row["metadata"]["solution"]
    final_answer=row["ground_truth"]["final_answer"]


    generations=row["generations"]
    if solution != "":
        row["ground_truth"]["solution"] = solution
    # for i in range(0): # 调用模型的次数，暂定为1
    #     generation={}
    #     generation["model"] = "DeepSeek-R1"
    #     reasoning_content, answer_content = call_huoshan(prompt,"r1")
    #     answer_content=answer_content.strip()
    #     generation["reasoning_content"] = reasoning_content
    #     generation["answer_content"] = answer_content
    #     # Verify the model content
    #     evaluation={}
    #     correctness = sciQA_rule_verifier(prompt, final_answer, answer_content) 
        
    #     evaluation["correctness"] = correctness
    #     evaluation["By"] = "mengpengyu"
    #     evaluation["Method"] = "Rule"
    #     evaluation["extra_tags"] = []
    #     generation["evaluation"] = evaluation
    #     generations.append(generation)

    # if len(generations) == 0:
    #     row["solve_rate"] = None
    # else:
    #     row["solve_rate"] = sum(1 for gen in generations if gen["evaluation"]["correctness"]) / len(generations)
    
    


In [10]:
import traceback
import pdb

with ThreadPoolExecutor(max_workers=200) as executor:
    counter = 0
    futures = {executor.submit(sciKnowEval_process_row, row): index for index, row in enumerate(sciQA_data)}
    for future in as_completed(futures):
        index = futures[future]
        try:
            future.result()  # 获取结果，确保异常被捕获
            counter += 1
            if counter % 10 == 0:
                print(f"Processed {counter} rows.")
        except Exception as e:
            print(f"Error processing row {index}: {e}")
            traceback.print_exc() 
# 将结果写入JSON文件
output_file = os.path.join(raw_data_path, "ScienceQA_processed.json")
output_file_with_image = os.path.join(raw_data_path, "ScienceQA_processed_with_image.json")
try:
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(sciQA_data, f, ensure_ascii=False, indent=4)   
        print(f"Results written to {output_file}")
    with open(output_file_with_image, "w", encoding="utf-8") as f:
        json.dump(sciQA_data, f, ensure_ascii=False, indent=4)   
        print(f"Results with images written to {output_file_with_image}")
except Exception as e:
    print(f"Error writing to file {output_file}: {e}")
    pdb.set_trace()


Processed 10 rows.
Processed 20 rows.
Processed 30 rows.
Processed 40 rows.
Processed 50 rows.
Processed 60 rows.
Processed 70 rows.
Processed 80 rows.
Processed 90 rows.
Processed 100 rows.
Processed 110 rows.
Processed 120 rows.
Processed 130 rows.
Processed 140 rows.
Processed 150 rows.
Processed 160 rows.
Processed 170 rows.
Processed 180 rows.
Processed 190 rows.
Processed 200 rows.
Processed 210 rows.
Processed 220 rows.
Processed 230 rows.
Processed 240 rows.
Processed 250 rows.
Processed 260 rows.
Processed 270 rows.
Processed 280 rows.
Processed 290 rows.
Processed 300 rows.
Processed 310 rows.
Processed 320 rows.
Processed 330 rows.
Processed 340 rows.
Processed 350 rows.
Processed 360 rows.
Processed 370 rows.
Processed 380 rows.
Processed 390 rows.
Processed 400 rows.
Processed 410 rows.
Processed 420 rows.
Processed 430 rows.
Processed 440 rows.
Processed 450 rows.
Processed 460 rows.
Processed 470 rows.
Processed 480 rows.
Processed 490 rows.
Processed 500 rows.
Processed