In [None]:
import requests
from langchain_openai import ChatOpenAI
import os
import json
import re
from collections import defaultdict

os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"] = "https://api.mixrai.com/v1"
llm = ChatOpenAI(model="gpt-4o")

data_folder = 'data'
output_folder = 'results/12gpt-4o'
os.makedirs(output_folder, exist_ok=True)

In [4]:
def process_data_file(file_path):
    """处理单个JSON文件"""
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    
    questions = data["questions"]
    skip_ids = set(map(str, data["skip"]))
    file_name = data["title"]
    print(f"\n正在处理文件: {file_name}")

    # 统计题型
    type_counts = defaultdict(int)
    for question in questions:
        if question["id"] not in skip_ids:
            type_counts[question["type"]] += 1

    print("\n各题型题数统计 (剔除skip题):")
    for type_name, count in type_counts.items():
        print(f"{type_name}: {count}题")
    print(f"\n总题数 (剔除skip题): {sum(type_counts.values())}")
    print(f"跳过的题数: {len(skip_ids)}")

    def predict_answer(question_data):
        """预测单个问题的答案"""
        question_text = question_data["question"]
        if question_data["options"]:  # 有选项
            options_text = "\n".join([f"{k}、{v}" for k, v in question_data["options"].items()])
            full_question = f"{question_text}\n{options_text}"
        else:
            full_question = question_text  # 没有选项
        
        prompt = f"""
        请根据题目内容选择最合适的答案。只需返回选项字母，不要包含其他任何内容！

        题目：
        {full_question}
        """
        response = llm.invoke(prompt)
        # 提取选项字母
        pred_answer = response.content.strip()
        match = re.search(r'([A-D]+)', pred_answer.upper())
        if match:
            return match.group(1)
        return pred_answer

    # 处理题目
    results = []
    correct = 0
    total_processed = 0
    type_stats = defaultdict(lambda: {'correct': 0, 'total': 0})

    for q in questions:
        if q["id"] in skip_ids or "answer" not in q:
            continue 
        
        total_processed += 1
        
        try:
            pred_answer = predict_answer(q)
            true_answer = q["answer"]
            is_correct = pred_answer == true_answer
            
            if is_correct:
                correct += 1
            
            question_type = q["type"]
            type_stats[question_type]['total'] += 1
            if is_correct:
                type_stats[question_type]['correct'] += 1
            
            results.append({
                "id": q["id"],
                "type": q["type"],
                "question": q["question"],
                "options": q["options"],
                "true_answer": true_answer,
                "pred_answer": pred_answer,
                "is_correct": is_correct
            })
                
        except Exception as e:
            print(f"Error processing question {q['id']}: {e}")
            question_type = q["type"]
            type_stats[question_type]['total'] += 1
            
            results.append({
                "id": q["id"],
                "type": q["type"],
                "question": q["question"],
                "options": q["options"],
                "true_answer": q.get("answer", ""),
                "pred_answer": "ERROR",
                "is_correct": False,
                "error": str(e)
            })

    # 计算准确率
    accuracy = correct / total_processed if total_processed > 0 else 0
    type_accuracies = {}
    for type_name, stats in type_stats.items():
        type_total = stats['total']
        type_correct = stats['correct']
        type_accuracies[type_name] = {
            'accuracy': type_correct / type_total if type_total > 0 else 0,
            'correct': type_correct,
            'total': type_total
        }

    print(f"\n总体准确率(剔除skip题目): {accuracy:.2%} ({correct}/{total_processed})")
    print("\n各题型准确率:")
    for type_name, acc in type_accuracies.items():
        print(f"{type_name}: {acc['accuracy']:.2%} ({acc['correct']}/{acc['total']})")

    # 保存结果
    output_file = os.path.join(output_folder, f"{file_name}.json")
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump({
            "title": data["title"],
            "skip": data["skip"],
            "overall_accuracy": accuracy,
            "total_processed_questions": total_processed,
            "correct_answers": correct,
            "skipped_questions": len(skip_ids),
            "type_accuracies": type_accuracies,
            "results": results
        }, f, ensure_ascii=False, indent=2)

    print(f"预测结果已保存到 {output_file}")

In [5]:
# 处理data文件夹下的所有JSON文件
print(f"开始处理 {data_folder} 文件夹下的所有JSON文件...")
for filename in os.listdir(data_folder):
    if filename.endswith('.json'):
        file_path = os.path.join(data_folder, filename)
        try:
            process_data_file(file_path)
        except Exception as e:
            print(f"处理文件 {filename} 时出错: {e}")
print("\n所有文件处理完成！")


开始处理 data 文件夹下的所有JSON文件...

正在处理文件: 2024年国家公务员录用考试《行测》题（副省级网友回忆版）

各题型题数统计 (剔除skip题):
常识判断: 20题
表达理解: 30题
数量关系: 13题
判断推理: 29题

总题数 (剔除skip题): 92
跳过的题数: 43

总体准确率(剔除skip题目): 60.87% (56/92)

各题型准确率:
常识判断: 65.00% (13/20)
表达理解: 80.00% (24/30)
数量关系: 15.38% (2/13)
判断推理: 58.62% (17/29)
预测结果已保存到 results/12gpt-4o\2024年国家公务员录用考试《行测》题（副省级网友回忆版）.json

正在处理文件: 2025年国家公务员录用考试《行测》题（副省级网友回忆版）

各题型题数统计 (剔除skip题):
政治理论: 20题
常识判断: 15题
表达理解: 20题
数量关系: 14题
判断推理: 24题

总题数 (剔除skip题): 93
跳过的题数: 42

总体准确率(剔除skip题目): 68.82% (64/93)

各题型准确率:
政治理论: 80.00% (16/20)
常识判断: 80.00% (12/15)
表达理解: 55.00% (11/20)
数量关系: 28.57% (4/14)
判断推理: 87.50% (21/24)
预测结果已保存到 results/12gpt-4o\2025年国家公务员录用考试《行测》题（副省级网友回忆版）.json

正在处理文件: 2023年国家公务员录用考试《行测》题（副省级网友回忆版）

各题型题数统计 (剔除skip题):
常识判断: 20题
表达理解: 30题
数量关系: 14题
判断推理: 28题

总题数 (剔除skip题): 92
跳过的题数: 43

总体准确率(剔除skip题目): 64.13% (59/92)

各题型准确率:
常识判断: 80.00% (16/20)
表达理解: 56.67% (17/30)
数量关系: 42.86% (6/14)
判断推理: 71.43% (20/28)
预测结果已保存到 results/12gpt-4o\2023年国家公务员录用考试《行测》题（副省级网友回忆版）.json
