In [8]:
import json
import glob
import random

def shuffle_options(question):
    # 提取原始选项和答案
    original_options = question["options"]
    original_answer = question["answer"]
    
    # 将选项内容存入列表并打乱顺序
    option_values = list(original_options.values())
    random.shuffle(option_values)
    
    # 生成新的选项字典（保持A/B/C/D顺序，但内容随机）
    new_options = {
        "A": option_values[0],
        "B": option_values[1],
        "C": option_values[2],
        "D": option_values[3]
    }
    
    # 找到正确答案对应的新选项标签
    correct_value = original_options[original_answer]
    new_answer = [k for k, v in new_options.items() if v == correct_value][0]
    
    # 更新题目数据
    question["options"] = new_options
    question["answer"] = new_answer
    return question

def process_questions(input_files, output_dir):
    # 创建输出目录
    import os
    os.makedirs(output_dir, exist_ok=True)
    
    for file_path in input_files:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            
            # 处理每个题目
            processed_questions = [shuffle_options(q) for q in data["questions"]]
            
            # 保持文件原始结构
            new_data = {
                "title": data["title"],
                "skip": data["skip"],
                "questions": processed_questions
            }
            
            # 保存到新文件
            output_path = os.path.join(output_dir, os.path.basename(file_path))
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(new_data, f, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    # 设置路径
    input_files = glob.glob('/kaggle/input/dataset111/guangdong.json')
    output_dir = "/kaggle/working/shuffled_questions"
    
    # 执行处理
    process_questions(input_files, output_dir)
    print(f"处理完成！输出文件保存在 {output_dir} 目录")

IndexError: list index out of range

In [9]:
import json
import os
import glob
import random

def validate_question(question):
    """验证题目结构是否合法"""
    required_keys = ["type", "id", "answer", "question", "options"]
    if not all(k in question for k in required_keys):
        return False
    
    options = question["options"]
    if len(options) != 4 or any(k not in options for k in ["A", "B", "C", "D"]):
        return False
    
    if question["answer"] not in ["A", "B", "C", "D"]:
        return False
    
    return True

def shuffle_options(question):
    """安全打乱选项顺序"""
    if not validate_question(question):
        print(f"跳过无效题目：ID {question.get('id')}")
        return question
    
    original_options = question["options"]
    original_answer = question["answer"]
    
    # 提取并打乱选项值
    option_values = list(original_options.values())
    random.shuffle(option_values)
    
    # 构建新选项字典
    new_options = {
        "A": option_values[0],
        "B": option_values[1],
        "C": option_values[2],
        "D": option_values[3]
    }
    
    # 查找正确答案
    try:
        correct_value = original_options[original_answer]
        new_answer = next(k for k, v in new_options.items() if v == correct_value)
    except (KeyError, StopIteration):
        print(f"答案匹配失败：ID {question['id']}，保留原答案")
        return question
    
    # 更新数据
    return {
        **question,
        "options": new_options,
        "answer": new_answer
    }

def process_single_file(input_path, output_dir):
    """处理单个文件"""
    try:
        with open(input_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception as e:
        print(f"文件读取失败：{input_path} - {str(e)}")
        return

    # 处理题目
    processed = []
    error_count = 0
    for q in data.get("questions", []):
        try:
            processed.append(shuffle_options(q))
        except Exception as e:
            error_count += 1
            print(f"处理失败：ID {q.get('id')} - {str(e)}")
            processed.append(q)

    # 保持原始结构
    new_data = {
        "title": data.get("title", ""),
        "skip": data.get("skip", []),
        "questions": processed
    }

    # 保存文件
    output_path = os.path.join(output_dir, os.path.basename(input_path))
    try:
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(new_data, f, ensure_ascii=False, indent=2)
        print(f"成功处理：{input_path} → {output_path} (错误数：{error_count})")
    except Exception as e:
        print(f"文件保存失败：{output_path} - {str(e)}")

if __name__ == "__main__":
    # 配置路径
    input_files = glob.glob('/kaggle/input/dataset111/guangdong.json')  # 支持多文件匹配
    output_dir = "/kaggle/working/shuffled_questions"
    
    # 创建输出目录
    os.makedirs(output_dir, exist_ok=True)
    
    # 处理所有文件
    print(f"开始处理 {len(input_files)} 个文件...")
    for file_path in input_files:
        process_single_file(file_path, output_dir)
    
    print(f"处理完成！输出目录：{output_dir}")

开始处理 1 个文件...
跳过无效题目：ID 1
跳过无效题目：ID 2
跳过无效题目：ID 3
跳过无效题目：ID 4
跳过无效题目：ID 5
跳过无效题目：ID 11
跳过无效题目：ID 12
跳过无效题目：ID 13
跳过无效题目：ID 14
跳过无效题目：ID 15
成功处理：/kaggle/input/dataset111/guangdong.json → /kaggle/working/shuffled_questions/guangdong.json (错误数：0)
处理完成！输出目录：/kaggle/working/shuffled_questions
