In [1]:
import os
import json
import csv
#import shutil

def process_relationship(writer, filename, relationship):
    event_list = relationship.get('事件', [])
    relation_key = '關係列表' if '關係列表' in relationship else '關係'
    relation_items = relationship.get(relation_key, [])

    for event in event_list:
        for rel in relation_items:
            subject_name, subject_type = rel['主體'].split("|") if "|" in rel['主體'] else (rel['主體'], 'null')
            object_name, object_type = rel['客體'].split("|") if "|" in rel['客體'] else (rel['客體'], 'null')

            main_original = rel.get('主體original', 'null')
            if isinstance(main_original, dict):
                main_original = json.dumps(main_original, ensure_ascii=False)

            object_original = rel.get('客體original', 'null')
            if isinstance(object_original, dict):
                object_original = json.dumps(object_original, ensure_ascii=False)

            relation = rel['關係']
            p_items = 'null'
            if "|" in relation:
                relation_parts = relation.split("|")
                relation = relation_parts[0]
                p_items = relation_parts[1]

            writer.writerow([
                filename, event,
                subject_name, subject_type,
                relation, p_items,
                object_name, object_type,
                rel.get('主體review', 'null'),
                main_original,
                rel.get('human', 'null'),
                rel.get('human_review', 'null'),
                rel.get('客體review', 'null'),
                object_original,
                rel.get('human', 'null'),
                rel.get('human_review', 'null')
            ])

def json_to_csv(json_data, csv_file):
    with open(csv_file, mode='w', newline='', encoding='utf-8-sig') as file:
        writer = csv.writer(file)
        writer.writerow([
            "檔名", "事件", "主體", "主體類別", "關係", "P-items", "客體", "客體類別",
            "S_review", "S_original", "S_human_review", "S_human_original",
            "O_review", "O_original", "O_human_review", "O_human_original"
        ])

        for file_data in json_data:
            filename = file_data["filename"]
            content = file_data["data"]

            # Case 1: {"review": {"relationships": [...]}}
            if isinstance(content, dict):
                if 'review' in content:
                    if isinstance(content['review'], dict) and 'relationships' in content['review']:
                        for rel in content['review']['relationships']:
                            process_relationship(writer, filename, rel)

                    # Case 4: {"review": [ {...事件, 關係列表...} ]}
                    elif isinstance(content['review'], list):
                        for rel in content['review']:
                            if '事件' in rel and ('關係列表' in rel or '關係' in rel):
                                process_relationship(writer, filename, rel)

                # ✅ Case 6: {"relationships": [ {...事件, 關係列表...} ]}
                elif 'relationships' in content and isinstance(content['relationships'], list):
                    for rel in content['relationships']:
                        if '事件' in rel and ('關係列表' in rel or '關係' in rel):
                            process_relationship(writer, filename, rel)

            # Case 2 / Case 3 / Case 5
            elif isinstance(content, list):
                for item in content:
                    if isinstance(item, dict):
                        # Case 2: [{"review": {"relationships": [...]}}]
                        if 'review' in item and isinstance(item['review'], dict) and 'relationships' in item['review']:
                            for rel in item['review']['relationships']:
                                process_relationship(writer, filename, rel)

                        # Case 3: [{"事件": [...], "關係列表": [...]}]
                        elif '事件' in item and '關係列表' in item:
                            process_relationship(writer, filename, item)

                        # Case 5: [{"事件": [...], "關係": [...]}]
                        elif '事件' in item and '關係' in item:
                            process_relationship(writer, filename, item)

def read_all_json_files(directory):                            
#def read_all_json_files(directory, done_directory):
    json_files = []
    for filename in os.listdir(directory):
        if filename.endswith(".json"):
            file_path = os.path.join(directory, filename)
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    json_data = json.load(f)
                    print(f"讀取檔案 {filename}")
                    json_files.append({"filename": filename, "data": json_data})

                # 移動已處理的檔案
                #done_file_path = os.path.join(done_directory, filename)
                #shutil.move(file_path, done_file_path)
                #print(f"已將檔案移動到 {done_directory}: {filename}")

            except Exception as e:
                #print(f"無法讀取或移動文件 {filename}: {e}")
                print(f"無法讀取 {filename}: {e}")
            
    return json_files

In [5]:
# 主流程
directory_path = "./docs/output/4_llm_resolution/llama_ft_gemini_v3/"
#directory_path = "./docs/output/4_llm_resolution/llama__feature_gemini"
#done_path = "./docs/output/4_llm_resolution/llama_v3/done"
csv_output_path = "./docs/output/4_llm_resolution/review_v3.csv"

json_data = read_all_json_files(directory_path)
if json_data:
    json_to_csv(json_data, csv_output_path)
else:
    print("未找到任何 JSON 檔案或無法讀取檔案")


讀取檔案 228事件(20).json
讀取檔案 「友仔」是什麼？光復初期臺北地區非法組織調查報告告訴您(37).json
讀取檔案 「回首向來蕭瑟處，歸去，也無風雨也無晴」—民國38年國軍遷臺紀事(30).json
讀取檔案 「威海衛」租借地的收回(42).json
讀取檔案 「快速」發展的年代：麥克阿瑟公路通車一甲子(203).json
讀取檔案 「日暮鄉關何處是」─「留越國軍」的返台路(31).json
讀取檔案 「賽德克．巴萊」重現的霧社事件(17).json
讀取檔案 「醫者仁也‧仁者人也」─光復初期臺灣醫學教育(29).json
讀取檔案 ㄋㄟㄋㄟ補給站：美援牛奶的供應(127).json
讀取檔案 一紙命令，臺灣命運大不同─中國台灣省行政長官公署警備總司令部第一號令(1).json
讀取檔案 不用手機也可哈拉一整天─45年度公用電話擴充計畫(2).json
讀取檔案 不能少了你—臺灣光復後首次戶口清查(35).json
讀取檔案 世界人權日(18).json
