In [1]:
import json

# 載入本地 hotpotqa JSON 檔
with open("hotpot_dev_distractor_v1.json", "r", encoding="utf-8") as f:
    dataset = json.load(f)

print(f"總共有 {len(dataset)} 筆資料")


總共有 7405 筆資料


In [2]:
import sqlite3

conn = sqlite3.connect("Dataset/feverous_wikiv1.db")
cur = conn.cursor()

# 看看有什麼 Table
cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cur.fetchall())

# 看看 wiki 表格的欄位
cur.execute("PRAGMA table_info(wiki);")
print(cur.fetchall())


[('wiki',)]
[(0, 'id', '', 0, None, 1), (1, 'data', 'json', 0, None, 0)]


In [1]:
import sqlite3, json, pprint, textwrap

DB_PATH = "Dataset/feverous_wikiv1.db"   # ← 你的檔案路徑
conn = sqlite3.connect(DB_PATH)
cur  = conn.cursor()

# 看看前 5 個 page_id
for row in cur.execute("SELECT id FROM wiki LIMIT 5"):
    print(row[0])



! (Cláudia Pascoal album)
! (The Dismemberment Plan album)
! (The Song Formerly Known As)
! (Trippie Redd album)


In [2]:
page_id = "Wolfgang_Niedecken"           # 任選一個
row = cur.execute("SELECT data FROM wiki WHERE id = ?", (page_id,)).fetchone()
page_json = json.loads(row[0])

# 印出所有 key（就是 element_id）
print(list(page_json.keys())[:20])       # 只列前 20 個看看


TypeError: 'NoneType' object is not subscriptable

In [3]:
# 找所有包含 Wolfgang 的頁面
for (pid,) in cur.execute("SELECT id FROM wiki WHERE id LIKE 'Wolfgang %'"):
    print(pid)


Wolfgang (album)
Wolfgang (band)
Wolfgang (disambiguation)
Wolfgang (wrestler)
Wolfgang A. Tomé
Wolfgang Abel and Marco Furlan
Wolfgang Abendroth
Wolfgang Ablinger-Sperrhacke
Wolfgang Abraham
Wolfgang Achtner
Wolfgang Albers
Wolfgang Albers (police president)
Wolfgang Albers (politician)
Wolfgang Alexander Thomas-San-Galli
Wolfgang Altenburg
Wolfgang Altmann
Wolfgang Aly
Wolfgang Amadeus Mozart
Wolfgang Amadeus Mozart and Prague
Wolfgang Amadeus Mozart and the Catholic Church
Wolfgang Amadeus Mozart in popular culture
Wolfgang Amadeus Phoenix
Wolfgang April
Wolfgang Assbrock
Wolfgang Auderer
Wolfgang Auhagen
Wolfgang Bahro
Wolfgang Baldus
Wolfgang Bartels
Wolfgang Bartels (politician, born 1903)
Wolfgang Barthels
Wolfgang Bauer
Wolfgang Bauer (journalist)
Wolfgang Bauer (physicist)
Wolfgang Baumgart
Wolfgang Baur
Wolfgang Becker
Wolfgang Becker (editor)
Wolfgang Behringer
Wolfgang Beltracchi
Wolfgang Benkert
Wolfgang Benz
Wolfgang Bergold
Wolfgang Bernhard
Wolfgang Bernhard Fränkel
W

In [9]:
import re, sqlite3, json

DB_PATH = "Dataset/feverous_wikiv1.db"
conn    = sqlite3.connect(DB_PATH)
cur     = conn.cursor()

# ----------------  關鍵函式  ----------------
def split_content_id(cid: str):
    """
    解析 FEVEROUS content_id：
      Wolfgang_Niedecken_sentence_0   ->  (Wolfgang Niedecken, sentence_0)
      Wolfgang_Niedecken_cell_0_4_1   ->  (Wolfgang Niedecken, cell_0_4_1)
    """
    m = re.search(r'_(sentence|cell|header)_(.+)', cid)
    if not m:
        raise ValueError(f"Un-recognized content_id: {cid}")

    page_raw   = cid[:m.start()]                   # Wolfgang_Niedecken
    element_id = f"{m.group(1)}_{m.group(2)}"     # sentence_0 / cell_0_4_1 ...
    page_id    = page_raw.replace("_", " ")       # 換成空白 → DB 的主鍵

    return page_id, element_id


def fetch_content(cid: str, con=conn) -> str:
    # ① 拆 content_id
    page_id, element_id = split_content_id(cid)

    # ② 取整頁 JSON
    row = con.execute("SELECT data FROM wiki WHERE id = ?", (page_id,)).fetchone()
    if not row:
        return f"[找不到頁面 {page_id}]"

    data = json.loads(row[0])

    # ③ 先精確命中
    if element_id in data:
        return data[element_id]

    # ④ fallback：若是 cell → 回傳整格或第一句
    if element_id.startswith("cell_"):
        base = "_".join(element_id.split("_")[:3])          # cell_0_4
        for alt in (base, base + "_0"):                     # cell_0_4 → cell_0_4_0
            if alt in data:
                return data[alt]

    # ⑤ 最終找不到 → 明確標示缺失
    return f"[元素 {element_id} 在當前 Wiki dump 中不存在]"

# -------------------------------------------


# ▶︎ 測試三段 evidence
for cid in [
    "Wolfgang_Niedecken_sentence_0",
    "Wolfgang_Niedecken_cell_0_4_1",
    "Wolfgang_Niedecken_sentence_1",
]:
    print("=== ", cid)
    print(fetch_content(cid), "\n")


===  Wolfgang_Niedecken_sentence_0
Wolfgang Niedecken (German pronunciation: [[Standard_German|[ˈvɔlfɡaŋ ˈniːdɛkn̩]], Colognian pronunciation: [[Colognian|[ˈvolfjaŋ ˈnidɛkˑən]]; born 30 March 1951) is a German singer, musician, and visual artist. 

===  Wolfgang_Niedecken_cell_0_4_1
[元素 cell_0_4_1 在當前 Wiki dump 中不存在] 

===  Wolfgang_Niedecken_sentence_1
He founded the Kölsch speaking rock group [[BAP_(German_band)|BAP]] at the end of the 1970s. 



In [11]:
import asyncio
from tools.sqlite_search_testable import SQLiteSearchTestable

async def test_sqlite_search():
    tool = SQLiteSearchTestable(db_path="C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db")
    await tool.setup()

    page_id = "Wolfgang_Niedecken"
    element_id = "Wolfgang Niedecken_sentence_0"

    result = await tool.search_feverous(page_id, element_id)
    print(f"搜尋結果：{result}")

    await tool.cleanup()

if __name__ == "__main__":
    asyncio.run(test_sqlite_search())


OperationalError: no such table: data

In [12]:
import sqlite3

class SQLiteSearchSimple:
    def __init__(self, db_path):
        self.db_path = db_path
        self.conn = None

    def connect(self):
        self.conn = sqlite3.connect(self.db_path)

    def close(self):
        if self.conn:
            self.conn.close()

    def search_feverous(self, page_id: str, element_id: str) -> str:
        # 解析 element_id
        parts = element_id.split("_")
        if len(parts) < 3:
            raise ValueError(f"Invalid element_id format: {element_id}")

        element_type = parts[1]
        number_id = "_".join(parts[2:])  # 有些是 0_1 這種
        table = ""
        id_col = ""
        text_col = "text"

        if element_type == "sentence":
            table = "sentences"
            id_col = "sentence_id"
        elif element_type == "cell" or element_type == "header_cell":
            table = "cells"
            id_col = "cell_id"
        else:
            raise ValueError(f"Unsupported element type: {element_type}")

        query = f"""
        SELECT {text_col} FROM {table}
        WHERE page_id = ? AND {id_col} = ?
        """

        cursor = self.conn.cursor()
        cursor.execute(query, (page_id, number_id))
        row = cursor.fetchone()
        if row:
            return row[0]
        else:
            return "NOT FOUND"

# 使用方法：
if __name__ == "__main__":
    db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
    tool = SQLiteSearchSimple(db_path)
    tool.connect()

    page_id = "Wolfgang Niedecken"
    element_id = "Wolfgang Niedecken_sentence_0"

    result = tool.search_feverous(page_id, element_id)
    print(f"搜尋結果：{result}")

    tool.close()


OperationalError: no such table: sentences

In [13]:
import sqlite3

def list_tables(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    conn.close()
    return [table[0] for table in tables]

# 測試
db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
tables = list_tables(db_path)
print("資料庫裡有以下表：")
for table in tables:
    print(f"- {table}")


資料庫裡有以下表：
- wiki


In [14]:
import sqlite3

def get_table_schema(db_path, table_name):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute(f"PRAGMA table_info({table_name});")
    columns = cursor.fetchall()
    conn.close()
    return [(col[1], col[2]) for col in columns]

# 測試
db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
columns = get_table_schema(db_path, "wiki")
print(f"wiki 表的欄位有：")
for col_name, col_type in columns:
    print(f"- {col_name} ({col_type})")


wiki 表的欄位有：
- id ()
- data (json)


In [15]:
import sqlite3
import json

def get_one_example(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT id, data FROM wiki LIMIT 1;")
    row = cursor.fetchone()
    conn.close()
    if row:
        id_value, data_json = row
        parsed_data = json.loads(data_json)
        return id_value, parsed_data
    return None, None

# 測試
db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
id_value, parsed_data = get_one_example(db_path)
print(f"id: {id_value}")
print("data內容的一小部分：")
print(json.dumps(parsed_data, indent=2)[:1000])  # 只印出前1000字避免太長


id: Acuitzeramo, Michoacán
data內容的一小部分：
{
  "order": [
    "sentence_0",
    "list_0",
    "section_0",
    "sentence_1",
    "sentence_2",
    "sentence_3",
    "sentence_4",
    "sentence_5",
    "sentence_6",
    "sentence_7",
    "sentence_8",
    "sentence_9",
    "sentence_10",
    "sentence_11",
    "sentence_12",
    "sentence_13",
    "sentence_14",
    "sentence_15",
    "sentence_16",
    "sentence_17",
    "sentence_18",
    "sentence_19",
    "sentence_20",
    "sentence_21",
    "sentence_22",
    "sentence_23",
    "sentence_24",
    "sentence_25",
    "sentence_26",
    "sentence_27",
    "sentence_28",
    "sentence_29",
    "sentence_30",
    "sentence_31",
    "sentence_32",
    "sentence_33",
    "sentence_34",
    "sentence_35",
    "sentence_36",
    "sentence_37",
    "sentence_38",
    "sentence_39",
    "sentence_40",
    "sentence_41",
    "sentence_42",
    "sentence_43",
    "sentence_44",
    "section_1",
    "sentence_45",
    "sentence_46",
    "sentence

In [16]:
import sqlite3
import json

class SQLiteSearchJSON:
    def __init__(self, db_path):
        self.db_path = db_path
        self.conn = None

    def connect(self):
        self.conn = sqlite3.connect(self.db_path)

    def close(self):
        if self.conn:
            self.conn.close()

    def search_feverous(self, page_id: str, element_id: str) -> str:
        query = """
        SELECT data FROM wiki
        WHERE id = ?
        """

        cursor = self.conn.cursor()
        cursor.execute(query, (page_id,))
        row = cursor.fetchone()
        if not row:
            return "NOT FOUND"

        data_json = row[0]
        data = json.loads(data_json)

        # ⭐️ 這裡把 element_id 的 page_id 拿掉
        if not element_id.startswith(page_id):
            return "Invalid element_id for page_id"
        pure_element_id = element_id[len(page_id) + 1:]  # +1 是因為有一個 "_" 需要跳過

        # sentence, cell, header_cell
        if pure_element_id.startswith("sentence"):
            sentences = data.get("sentences", {})
            if pure_element_id in sentences:
                return sentences[pure_element_id].get("text", "NOT FOUND")
        elif pure_element_id.startswith("cell"):
            cells = data.get("cells", {})
            if pure_element_id in cells:
                return cells[pure_element_id].get("text", "NOT FOUND")
        elif pure_element_id.startswith("header_cell"):
            headers = data.get("headers", {})
            if pure_element_id in headers:
                return headers[pure_element_id].get("text", "NOT FOUND")

        return "NOT FOUND"

# 使用方法
if __name__ == "__main__":
    db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
    tool = SQLiteSearchJSON(db_path)
    tool.connect()

    page_id = "Acuitzeramo, Michoacán"
    element_id = "Acuitzeramo, Michoacán_sentence_0"

    result = tool.search_feverous(page_id, element_id)
    print(f"搜尋結果：{result}")

    tool.close()


搜尋結果：NOT FOUND


In [17]:
import sqlite3

def check_page_id(db_path, page_id):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT 1 FROM wiki WHERE id = ?", (page_id,))
    result = cursor.fetchone()
    conn.close()
    return result is not None

# 測試
db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
page_id = "Acuitzeramo, Michoacán"
print(check_page_id(db_path, page_id))  # 會印 True or False


True


In [18]:
import sqlite3
import json

class SQLiteSearchJSON:
    def __init__(self, db_path):
        self.db_path = db_path
        self.conn = None

    def connect(self):
        self.conn = sqlite3.connect(self.db_path)

    def close(self):
        if self.conn:
            self.conn.close()

    def search_feverous(self, page_id: str, element_id: str) -> str:
        print(f"🔍 正在查 page_id: {page_id}")
        query = """
        SELECT data FROM wiki
        WHERE id = ?
        """

        cursor = self.conn.cursor()
        cursor.execute(query, (page_id,))
        row = cursor.fetchone()
        if not row:
            print(f"⚠️ 找不到 page_id {page_id}！")
            return "NOT FOUND"

        data_json = row[0]
        data = json.loads(data_json)

        if not element_id.startswith(page_id):
            print(f"⚠️ element_id {element_id} 和 page_id {page_id} 不符！")
            return "Invalid element_id for page_id"

        pure_element_id = element_id[len(page_id) + 1:]
        print(f"🔍 查詢元素: {pure_element_id}")

        if pure_element_id.startswith("sentence"):
            sentences = data.get("sentences", {})
            print(f"✅ sentences 有 {len(sentences)} 條資料")
            if pure_element_id in sentences:
                print(f"✅ 找到句子 {pure_element_id}")
                return sentences[pure_element_id].get("text", "NOT FOUND")
            else:
                print(f"⚠️ 找不到句子 {pure_element_id}")
        elif pure_element_id.startswith("cell"):
            cells = data.get("cells", {})
            if pure_element_id in cells:
                return cells[pure_element_id].get("text", "NOT FOUND")
        elif pure_element_id.startswith("header_cell"):
            headers = data.get("headers", {})
            if pure_element_id in headers:
                return headers[pure_element_id].get("text", "NOT FOUND")

        return "NOT FOUND"

# 使用方法
if __name__ == "__main__":
    db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
    tool = SQLiteSearchJSON(db_path)
    tool.connect()

    page_id = "Acuitzeramo, Michoacán"
    element_id = "Acuitzeramo, Michoacán_sentence_0"

    result = tool.search_feverous(page_id, element_id)
    print(f"搜尋結果：{result}")

    tool.close()


🔍 正在查 page_id: Acuitzeramo, Michoacán
🔍 查詢元素: sentence_0
✅ sentences 有 0 條資料
⚠️ 找不到句子 sentence_0
搜尋結果：NOT FOUND


In [19]:
import sqlite3
import json

def get_data(db_path, page_id):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT data FROM wiki WHERE id = ?", (page_id,))
    row = cursor.fetchone()
    conn.close()
    if row:
        data = json.loads(row[0])
        return data
    return None

# 測試
db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
page_id = "Acuitzeramo, Michoacán"
data = get_data(db_path, page_id)
print(json.dumps(data, indent=2)[:2000])  # 印出前2000字，避免太長


{
  "order": [
    "sentence_0",
    "list_0",
    "section_0",
    "sentence_1",
    "sentence_2",
    "sentence_3",
    "sentence_4",
    "sentence_5",
    "sentence_6",
    "sentence_7",
    "sentence_8",
    "sentence_9",
    "sentence_10",
    "sentence_11",
    "sentence_12",
    "sentence_13",
    "sentence_14",
    "sentence_15",
    "sentence_16",
    "sentence_17",
    "sentence_18",
    "sentence_19",
    "sentence_20",
    "sentence_21",
    "sentence_22",
    "sentence_23",
    "sentence_24",
    "sentence_25",
    "sentence_26",
    "sentence_27",
    "sentence_28",
    "sentence_29",
    "sentence_30",
    "sentence_31",
    "sentence_32",
    "sentence_33",
    "sentence_34",
    "sentence_35",
    "sentence_36",
    "sentence_37",
    "sentence_38",
    "sentence_39",
    "sentence_40",
    "sentence_41",
    "sentence_42",
    "sentence_43",
    "sentence_44",
    "section_1",
    "sentence_45",
    "sentence_46",
    "sentence_47",
    "sentence_48",
    "sentence_49

In [20]:
import sqlite3
import json

class SQLiteSearchJSON:
    def __init__(self, db_path):
        self.db_path = db_path
        self.conn = None

    def connect(self):
        self.conn = sqlite3.connect(self.db_path)

    def close(self):
        if self.conn:
            self.conn.close()

    def search_feverous(self, page_id: str, element_id: str) -> str:
        print(f"🔍 正在查 page_id: {page_id}")
        query = """
        SELECT data FROM wiki
        WHERE id = ?
        """

        cursor = self.conn.cursor()
        cursor.execute(query, (page_id,))
        row = cursor.fetchone()
        if not row:
            print(f"⚠️ 找不到 page_id {page_id}！")
            return "NOT FOUND"

        data_json = row[0]
        data = json.loads(data_json)

        if not element_id.startswith(page_id):
            print(f"⚠️ element_id {element_id} 和 page_id {page_id} 不符！")
            return "Invalid element_id for page_id"

        pure_element_id = element_id[len(page_id) + 1:]
        print(f"🔍 查詢元素: {pure_element_id}")

        # 🆕 更新：直接找 data 的 key
        value = data.get(pure_element_id)
        if value:
            if isinstance(value, dict):
                return value.get("text", str(value))  # 如果是 dict 優先拿 text 欄位
            else:
                return value  # 直接是句子字串
        else:
            return "NOT FOUND"

# 使用方法
if __name__ == "__main__":
    db_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"
    tool = SQLiteSearchJSON(db_path)
    tool.connect()

    page_id = "Acuitzeramo, Michoacán"
    element_id = "Acuitzeramo, Michoacán_sentence_0"

    result = tool.search_feverous(page_id, element_id)
    print(f"搜尋結果：{result}")

    tool.close()


🔍 正在查 page_id: Acuitzeramo, Michoacán
🔍 查詢元素: sentence_0
搜尋結果：Acuitzeramo (Spanish pronunciation: [[Spanish|[akwitseˈɾamo]]) is a small town located in the municipality of Tlazazalca in the [[Mexico|Mexican]] [[Mexican_state|state]] of [[Michoacán|Michoacán]].


In [21]:
import asyncio
from tools.sqlite_search import SQLiteSearch

async def test_sqlite_tool():
    # 建立一個假的 app 和 kani
    class FakeApp:
        pass

    class FakeKani:
        pass

    # 用假的 app 和 kani 建立 tool
    tool = SQLiteSearch(app=FakeApp(), kani=FakeKani(), db_path="C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db")
    await tool.setup()

    # 測試查詢
    page_id = "Acuitzeramo, Michoacán"
    element_id = "Acuitzeramo, Michoacán_sentence_0"

    result = await tool.search_feverous(page_id, element_id)
    print(f"搜尋結果：{result}")

    await tool.cleanup()

if __name__ == "__main__":
    asyncio.run(test_sqlite_tool())


OperationalError: no such table: data

In [22]:
import asyncio
import json
import os
from app import AutoAgentSystem
from tools.sqlite_search import SQLiteSearch

async def batch_test(dataset_path, output_path):
    # 初始化系統
    app = AutoAgentSystem(
        tool_configs={
            SQLiteSearch: {
                "always_include": True,
                "kwargs": {
                    "db_path": "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_wikiv1.db"  # 你的DB路徑
                }
            }
        },
        root_has_tools=True
    )
    kani = await app.ensure_init()

    # 讀取你的 JSONL dataset
    with open(dataset_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    results = []

    for line in lines:
        sample = json.loads(line)
        claim_id = sample["id"]
        claim_text = sample["claim"]
        evidence = sample.get("evidence", {})
        label = sample.get("label", "NOT AVAILABLE")

        # 如果 evidence 不存在，跳過
        if not evidence:
            results.append({
                "id": claim_id,
                "claim": claim_text,
                "result": "No evidence provided",
                "label": label
            })
            continue

        page_elements = evidence.get("content", [])
        claim_results = []

        # 查詢 evidence 中每一個 element_id
        for element_id in page_elements:
            context = evidence.get("context", {}).get(element_id, [])
            if not context:
                claim_results.append(f"Element {element_id} no context")
                continue

            # context 第一個應該是 page_id
            page_id = context[0]

            # 用 tool 查詢
            tool = kani.get_tool(SQLiteSearch)
            result = await tool.search_feverous(page_id, element_id)
            claim_results.append({
                "page_id": page_id,
                "element_id": element_id,
                "result": result
            })

        results.append({
            "id": claim_id,
            "claim": claim_text,
            "evidences": claim_results,
            "label": label
        })

    # 把結果存檔
    with open(output_path, 'w', encoding='utf-8') as f_out:
        json.dump(results, f_out, indent=2, ensure_ascii=False)

    print(f"✅ 測試完成，結果已存到 {output_path}")

    await app.close()

if __name__ == "__main__":
    dataset_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/feverous_dev.jsonl"  # 你的 dataset 路徑
    output_path = "C:/Users/USER/Downloads/Test_Agent/Test_5/Dataset/test_results.json"
    asyncio.run(batch_test(dataset_path, output_path))


請輸入你的指令:  請幫我查詢 page_id 是 "Acuitzeramo, Michoacán"，element_id 是 "Acuitzeramo, Michoacán_sentence_0" 的內容。


AI:為了完成這項任務，我將會委派一位專家代理人來查找指定的內容。請稍等片刻，我將分配這項任務。

[✅ 任務指派] Agent: alpha
📄 被指派的任務：
Please search for the content associated with page_id 'Acuitzeramo, Michoacán' and element_id 'Acuitzeramo, Michoacán_sentence_0'. Retrieve the specific content from the database or any available resources.
----------------------------------------

AI:'alpha' is helping you with this request.
AI:


Task exception was never retrieved
future: <Task finished name='Task-150' coro=<Connection.run() done, defined at C:\Users\USER\anaconda3\envs\crew_env\Lib\site-packages\playwright\_impl\_connection.py:272> exception=NotImplementedError()>
Traceback (most recent call last):
  File "C:\Users\USER\anaconda3\envs\crew_env\Lib\asyncio\tasks.py", line 277, in __step
    result = coro.send(None)
             ^^^^^^^^^^^^^^^
  File "C:\Users\USER\anaconda3\envs\crew_env\Lib\site-packages\playwright\_impl\_connection.py", line 279, in run
    await self._transport.connect()
  File "C:\Users\USER\anaconda3\envs\crew_env\Lib\site-packages\playwright\_impl\_transport.py", line 133, in connect
    raise exc
  File "C:\Users\USER\anaconda3\envs\crew_env\Lib\site-packages\playwright\_impl\_transport.py", line 120, in connect
    self._proc = await asyncio.create_subprocess_exec(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\USER\anaconda3\envs\crew_env\Lib\asyncio\subproces

AI:alpha:📌 Acuitzeramo, Michoacán - Wikipedia
🔗 https://en.wikipedia.org/wiki/Acuitzeramo,_Michoacán
📝 Acuitzeramo (Spanish pronunciation: [akwitseˈɾamo]) is a small town located in the municipality of Tlazazalca in the Mexican state of Michoacán. External links [ edit ]

📌 Acuitzeramo, Michoacán
🔗 http://acuitzeramo.com/
📝 Acuitzeramo, Michoacán situado al noroeste de Purépero . es jefatura de Tenencia del municipio de Tlazazalca actualmente tiene dos vías de acceso: una que parte de Purépero, pasando por el Salto, Villa Mendoza, con una extensión de aproximadamente 10 Km.; la otra que va de Purépero. Tlazazalca, La Yerbabuena, San. Isidro y Acuitzeramo ...

📌 Acutzeramo Michoacan Mrxico - YouTube
🔗 https://www.youtube.com/watch?v=HYh2g6sUrnM
📝 Take a virtual tour of Acutzeramo in Michoacan, Mexico! Explore the vibrant culture, delicious cuisine, and stunning scenery of this beautiful Mexican town. ...❌ Failed to load the page via Playwright: It seems I couldn't directly access the Wi

請輸入你的指令:  exit


In [23]:
import os
import json

class WikipediaRetriever:
    def __init__(self, wiki_dir):
        self.wiki_dir = wiki_dir
        self.page_index = {}

    def build_index(self):
        print("Building index... (only indexing page titles and file positions)")
        # 先建立一個 page_name -> 檔案路徑的 mapping
        for filename in os.listdir(self.wiki_dir):
            if filename.endswith('.jsonl'):
                file_path = os.path.join(self.wiki_dir, filename)
                with open(file_path, 'r', encoding='utf-8') as f:
                    for line in f:
                        data = json.loads(line)
                        page_id = data['id']
                        self.page_index[page_id] = file_path
        print(f"Index built: {len(self.page_index)} pages indexed.")

    def get_page(self, page_id):
        if page_id not in self.page_index:
            print(f"Page {page_id} not found in index.")
            return None
        file_path = self.page_index[page_id]
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                data = json.loads(line)
                if data['id'] == page_id:
                    return data
        print(f"Page {page_id} not found in file {file_path}.")
        return None

    def get_sentence(self, page_id, sentence_id):
        page = self.get_page(page_id)
        if page is None:
            return None
        try:
            return page['text'][sentence_id]
        except IndexError:
            print(f"Sentence {sentence_id} not found in page {page_id}.")
            return None

# 使用方式
wiki_dir = r"C:\Users\USER\Downloads\Test_Agent\Test_5\Dataset\FEVER\wiki-pages"
retriever = WikipediaRetriever(wiki_dir)
retriever.build_index()

# 測試：找一個 page 和 sentence
page_id = "Oliver_Reed"
sentence_id = 0
sentence = retriever.get_sentence(page_id, sentence_id)
print("Retrieved sentence:", sentence)


Building index... (only indexing page titles and file positions)
Index built: 5416537 pages indexed.
Retrieved sentence: R
