In [4]:
import pandas as pd
import json
import os
from pathlib import Path

# 读取不同中心的key映射关系
key_mappings_zs = pd.read_csv('../data_source/key_encoding_zs.csv')
key_mappings_jm = pd.read_csv('../data_source/key_encodings_jm.csv')

print("Key mappings for center1 (ZS) loaded:")
print(key_mappings_zs.head())
print("\nKey mappings for center2 (JM) loaded:")
print(key_mappings_jm.head())

# 创建映射字典：key -> encoding，确保key为字符串类型
mapping_dict_zs = dict(zip(key_mappings_zs['key'].astype(str), key_mappings_zs['encoding']))
mapping_dict_jm = dict(zip(key_mappings_jm['key'].astype(str), key_mappings_jm['encoding']))

print(f"\nMapping dictionary for center1 created with {len(mapping_dict_zs)} entries")
print(f"Mapping dictionary for center2 created with {len(mapping_dict_jm)} entries")

# 检查映射字典的key类型
print(f"\nSample keys from center1 mapping: {list(mapping_dict_zs.keys())[:5]}")
print(f"Sample keys from center2 mapping: {list(mapping_dict_jm.keys())[:5]}")

def update_json_keys(json_file_path, mapping_dict):
    """更新JSON文件中的keys，将key转换为encoding"""
    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        print(f"\nProcessing {json_file_path}")
        print(f"Original keys sample: {list(data.keys())[:5]}")
        print(f"Original key types: {[type(k) for k in list(data.keys())[:5]]}")
        
        # 递归更新keys
        def update_keys(obj):
            if isinstance(obj, dict):
                new_dict = {}
                for key, value in obj.items():
                    # 确保key为字符串类型进行匹配
                    key_str = str(key)
                    # 将key转换为encoding
                    new_key = mapping_dict.get(key_str, key)
                    new_dict[new_key] = update_keys(value)
                    
                    # 调试信息
                    if key_str in mapping_dict:
                        print(f"Mapped: {key_str} -> {new_key}")
                    else:
                        print(f"Not found in mapping: {key_str}")
                        
                return new_dict
            elif isinstance(obj, list):
                return [update_keys(item) for item in obj]
            else:
                return obj
        
        updated_data = update_keys(data)
        
        print(f"Updated keys sample: {list(updated_data.keys())[:5]}")
        
        # 创建输出目录
        output_dir = Path("updated") / json_file_path.parent
        output_dir.mkdir(parents=True, exist_ok=True)
        
        # 写回文件
        output_path = Path("updated") / json_file_path
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(updated_data, f, ensure_ascii=False, indent=2)
        
        return True
    except Exception as e:
        print(f"Error processing {json_file_path}: {e}")
        return False

# 处理center1和center2目录中的所有JSON文件
centers = [
    ('center1', mapping_dict_zs),
    ('center2', mapping_dict_jm)
]
total_processed = 0
total_success = 0

for center_name, mapping_dict in centers:
    center_path = Path(center_name)
    if center_path.exists():
        json_files = list(center_path.glob('*.json'))
        print(f"\nProcessing {len(json_files)} JSON files in {center_name}/")
        
        for json_file in json_files:
            total_processed += 1
            if update_json_keys(json_file, mapping_dict):
                total_success += 1
                print(f"✓ Updated: {json_file}")
            else:
                print(f"✗ Failed: {json_file}")
    else:
        print(f"Directory {center_name}/ not found")

print(f"\nSummary: {total_success}/{total_processed} files processed successfully")


Key mappings for center1 (ZS) loaded:
   encoding       key
0         1  31214485
1         2  31209905
2         3  31131693
3         4  31138640
4         5  31139858

Key mappings for center2 (JM) loaded:
       key  encoding
0  1011449         1
1  1012811         2
2  1013176         3
3  1013447         4
4  1014157         5

Mapping dictionary for center1 created with 146 entries
Mapping dictionary for center2 created with 53 entries

Sample keys from center1 mapping: ['31214485', '31209905', '31131693', '31138640', '31139858']
Sample keys from center2 mapping: ['1011449', '1012811', '1013176', '1013447', '1014157']

Processing 131 JSON files in center1/

Processing center1/qwen3_32B_divided_check1300_3.json
Original keys sample: ['31214485', '31209905', '31131693', '31138640', '31139858']
Original key types: [<class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>]
Mapped: 31214485 -> 1
Mapped: 31209905 -> 2
Mapped: 31131693 -> 3
Mapped: 31138640 -> 4
Mapped