In [2]:
import os
import json
import requests
from rdflib import Graph, URIRef, Literal, RDF, Namespace, BNode
from rdflib.namespace import XSD, RDFS, OWL

# 定義命名空間
EX = Namespace("http://example.org/schema#")
WD = Namespace("http://www.wikidata.org/entity/")
WDT = Namespace("http://www.wikidata.org/prop/direct/")
DBR = Namespace("http://dbpedia.org/resource/")

LITERAL_CLASSES = {"數量", "比例", "單位", "貨幣", "時間"}

# 載入 JSON 檔案
def load_json(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        return json.load(file)

# 從 Wikidata 獲取標籤和描述
def get_wikidata_info(qid):
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            entity = data['entities'].get(qid, {})
            labels = entity.get('labels', {})
            descriptions = entity.get('descriptions', {})
            label = labels.get('zh-tw', {}).get('value') or labels.get('zh', {}).get('value')
            description = descriptions.get('zh-tw', {}).get('value') or descriptions.get('zh', {}).get('value')
            return label, description
    except Exception as e:
        print(f"Error fetching Wikidata info for {qid}: {e}")
    return None, None

# 從 DBpedia 獲取標籤和描述
def get_dbpedia_info(uri):
    sparql = f"""
    SELECT ?label ?comment WHERE {{
        <{uri}> rdfs:label ?label .
        OPTIONAL {{ <{uri}> rdfs:comment ?comment . }}
        FILTER (lang(?label) = 'zh' || lang(?label) = 'zh-tw')
    }}
    """
    endpoint = "https://dbpedia.org/sparql"
    try:
        response = requests.get(endpoint, params={'query': sparql, 'format': 'json'})
        if response.status_code == 200:
            results = response.json().get('results', {}).get('bindings', [])
            if results:
                label = results[0].get('label', {}).get('value')
                comment = results[0].get('comment', {}).get('value')
                return label, comment
    except Exception as e:
        print(f"Error fetching DBpedia info for {uri}: {e}")
    return None, None

# 添加實體資訊到 RDF 圖中
def add_entity_info_to_graph(g, entity_uri, entity_id):
    label, description = get_wikidata_info(entity_id)
    if label:
        g.add((entity_uri, RDFS.label, Literal(label, lang='zh-TW')))
    if description:
        g.add((entity_uri, RDFS.comment, Literal(description, lang='zh-TW')))
    if not label or not description:
        dbpedia_uri = f"http://dbpedia.org/resource/{entity_id}"
        label, description = get_dbpedia_info(dbpedia_uri)
        if label:
            g.add((entity_uri, RDFS.label, Literal(label, lang='zh-TW')))
        if description:
            g.add((entity_uri, RDFS.comment, Literal(description, lang='zh-TW')))

# 建立 Literal 的 Blank Node
def create_literal_blank_node(g, label, entity_type):
    bnode = BNode()
    try:
        if "成" in label:
            numeric_value = float(label.replace("成", "")) / 10
            g.add((bnode, RDF.value, Literal(numeric_value, datatype=XSD.float)))
        elif label.replace(".", "", 1).isdigit():
            g.add((bnode, RDF.value, Literal(float(label), datatype=XSD.float)))
        else:
            g.add((bnode, RDF.value, Literal(label, lang='zh-TW')))
    except:
        g.add((bnode, RDF.value, Literal(label, lang='zh-TW')))
    g.add((bnode, RDF.type, EX[entity_type]))
    g.add((bnode, RDFS.label, Literal(label, lang='zh-TW')))
    return bnode

# 轉換 JSON 內容為 Turtle 格式
def convert_json_to_turtle(data, filename, output_dir):
    g = Graph()
    g.bind("ex", EX)
    g.bind("wd", WD)
    g.bind("wdt", WDT)
    g.bind("rdfs", RDFS)
    g.bind("dbr", DBR)

    for entry in data['review']['relationships']:
        for event in entry['事件']:
            event_uri = URIRef(f"http://example.org/doc/{filename}#event_{event}")
            g.add((event_uri, RDF.type, EX.Event))
            g.add((event_uri, EX.describedIn, URIRef(f"http://example.org/doc/{filename}.json")))
            g.add((event_uri, RDFS.label, Literal(event, lang='zh-TW')))
            g.add((event_uri, RDFS.comment, Literal(f"事件：{event}", lang='zh-TW')))

            for relationship in entry['關係列表']:
                subj_label = relationship.get('主體')
                subj_qid = relationship.get('主體 QID')
                subj_dbpedia = relationship.get('主體 DBpedia')
                subj_type = relationship.get('主體類別')

                obj_label = relationship.get('客體')
                obj_qid = relationship.get('客體 QID')
                obj_dbpedia = relationship.get('客體 DBpedia')
                obj_type = relationship.get('客體類別')

                predicate = relationship.get('p-items')
                predicate_uri = WDT[predicate]

                if subj_type in LITERAL_CLASSES:
                    subj_node = create_literal_blank_node(g, subj_label, subj_type)
                elif subj_qid:
                    subj_node = WD[subj_qid]
                    add_entity_info_to_graph(g, subj_node, subj_qid)
                    if subj_dbpedia:
                        g.add((subj_node, OWL.sameAs, URIRef(subj_dbpedia)))
                else:
                    subj_node = Literal(subj_label, lang='zh-TW')

                if obj_type in LITERAL_CLASSES:
                    obj_node = create_literal_blank_node(g, obj_label, obj_type)
                elif obj_qid:
                    obj_node = WD[obj_qid]
                    add_entity_info_to_graph(g, obj_node, obj_qid)
                    if obj_dbpedia:
                        g.add((obj_node, OWL.sameAs, URIRef(obj_dbpedia)))
                else:
                    obj_node = Literal(obj_label, lang='zh-TW')

                triple_node = BNode()
                g.add((triple_node, RDF.type, EX.Triple))
                g.add((triple_node, EX.subject, subj_node))
                g.add((triple_node, EX.predicate, predicate_uri))
                g.add((triple_node, EX.object, obj_node))

                g.add((event_uri, EX.hasTriple, triple_node))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    ttl_filename = os.path.join(output_dir, f"{filename}.ttl")
    g.serialize(destination=ttl_filename, format="turtle", encoding="utf-8")
    print(f"RDF Turtle file saved as {ttl_filename}")

# 處理整個資料夾的 JSON 檔案
def convert_json_folder_to_turtle(input_folder, output_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith(".json"):
            json_filepath = os.path.join(input_folder, filename)
            data = load_json(json_filepath)
            convert_json_to_turtle(data, filename[:-5], output_folder)


In [None]:

# 設定輸入和輸出的資料夾路徑
input_folder = "./docs/output/5_wiki/v7"  # 你的 JSON 檔案資料夾
output_folder = "./docs/output/6_rdf/event"  # 轉換後的 Turtle 檔案輸出資料夾

# 執行轉換
convert_json_folder_to_turtle(input_folder, output_folder)

RDF Turtle file saved as ./docs/output/9_rdf/event_0426\「友仔」是什麼？光復初期臺北地區非法組織調查報告告訴您(37).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\「回首向來蕭瑟處，歸去，也無風雨也無晴」—民國38年國軍遷臺紀事(30).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\「威海衛」租借地的收回(42).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\「快速」發展的年代：麥克阿瑟公路通車一甲子(203).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\「日暮鄉關何處是」─「留越國軍」的返台路(31).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\「賽德克．巴萊」重現的霧社事件(17).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\「醫者仁也‧仁者人也」─光復初期臺灣醫學教育(29).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\ㄋㄟㄋㄟ補給站：美援牛奶的供應(127).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\一紙命令，臺灣命運大不同─中國台灣省行政長官公署警備總司令部第一號令(1).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\不用手機也可哈拉一整天─45年度公用電話擴充計畫(2).ttl
RDF Turtle file saved as ./docs/output/9_rdf/event_0426\不能少了你—臺灣光復後首次戶口清查(35).ttl
RDF Turtle file saved as ./docs/output/9_rdf/ev