In [None]:
import os
import json
import shutil
import re

def is_kanji(char):
    """Check if a character is a Japanese Kanji (Chinese character)"""
    return bool(re.fullmatch(r'[\u4e00-\u9faf\u3400-\u4dbf]', char))

def classify_kanji(input_root, output_root):
    """
    Classify characters into Kanji and non-Kanji based on JSON files
    :param input_root: Root directory containing JSON files (original structure)
    :param output_root: Root directory for classified output
    """
    for root, dirs, files in os.walk(input_root):
        for filename in files:
            if filename.endswith('.json'):
                json_path = os.path.join(root, filename)
                
                # Read JSON file
                with open(json_path, 'r', encoding='utf-8') as f:
                    try:
                        data = json.load(f)
                        char = data.get('character', '').strip()
                    except json.JSONDecodeError:
                        print(f"Invalid JSON file: {json_path}")
                        continue

                if not char:
                    print(f"Empty character or invalid data: {json_path}")
                    continue

                # Determine classification
                category = 'kanji' if is_kanji(char) else 'no_kanji'
                
                # Build output path (preserve original structure, add category folder)
                relative_path = os.path.relpath(root, input_root)
                output_dir = os.path.join(output_root, relative_path, category)
                os.makedirs(output_dir, exist_ok=True)

                # Copy JSON file to corresponding category folder
                output_path = os.path.join(output_dir, filename)
                shutil.copy2(json_path, output_path)

                # Optional: Copy corresponding image file (assuming same name, different extension)
                img_extensions = ['.png', '.jpg', '.jpeg']
                for ext in img_extensions:
                    img_path = json_path.replace('.json', ext)
                    if os.path.exists(img_path):
                        shutil.copy2(img_path, output_path.replace('.json', ext))

    print("Classification complete! Results saved in:", output_root)

# Usage example
input_root = "path/to/your/json_files"  # Replace with your JSON root directory
output_root = "path/to/output_classified"  # Replace with output directory
classify_kanji(input_root, output_root)

分类完成！结果保存在: output_classified
