In [None]:
import csv
import json
import re
import time
import random
import requests


def flatten_list(nested_list):
    """
    遞迴方式將多層巢狀 list 攤平成一維。
    """
    result = []
    for item in nested_list:
        if isinstance(item, list):
            result.extend(flatten_list(item))
        else:
            result.append(item)
    return result


def parse_entrance_fee(fee_data):
    """
    遞迴或扁平化檢查門票資訊，找第一個「免費」或「$xxx」。
    為簡化，示範只捕捉 "$" 或 "NT$" 後加上數字、小數、逗號。
    """
    if not fee_data:
        return None

    if isinstance(fee_data, str):
        fee_data = [fee_data]

    flattened = flatten_list(fee_data)
    for text_line in flattened:
        if not isinstance(text_line, str):
            continue

        if "免費" in text_line:
            return "free"

        match = re.search(r"(?:NT\$|\$)\s?([\d,\.]+)", text_line)
        if match:
            return match.group(0)  # e.g. "$100.00" 或 "NT$ 120"

    return None


def parse_opening_hours(data):
    """
    解析營業時間 data[6][34][1] (若存在)。
    回傳一個 { "星期一": "10:00–18:00", ... } 的 dict，
    或直接把中文星期 -> 時間的 mapping 存成字串亦可。
    """
    opening_dict = {}
    try:
        oh_list = data[6][34][1]  # 7 筆，或依據實際結構
        for item in oh_list:
            # item類似 ["星期一", ["10:00–18:00"]]
            day_name = item[0]
            day_time = item[1][0] if len(item[1]) > 0 else None
            opening_dict[day_name] = day_time
    except (IndexError, TypeError):
        pass
    return opening_dict


def extract_item_labels(sub_items):
    """
    給定一個子陣列，如: [["無障礙入口", 1], ["無障礙洗手間", 1], ["無障礙停車場", 0], ...]
    回傳所有 val=1 的 label list，例如 ["無障礙入口", "無障礙洗手間"]。
    """
    labels = []
    if not sub_items or not isinstance(sub_items, list):
        return labels

    for elem in sub_items:
        # elem 可能是 ["無障礙入口", 1], ["Wi-Fi", 1], ...
        if isinstance(elem, list) and len(elem) >= 2 and isinstance(elem[0], str):
            label = elem[0]
            val = elem[1]
            if val == 1:
                labels.append(label)
    return labels


def parse_all_tags(info_blocks):
    """
    解析 data[6][100][1]，擷取所有標籤與對應的值，避免解析錯誤。

    每個標籤的格式：
    {
      "無障礙程度": ["無障礙入口", "無障礙洗手間", "無障礙停車場"],
      "設施": ["洗手間", "餐廳", "Wi-Fi"],
      "特色": ["導覽服務", "門票優惠"],
      ...
    }
    """
    result = {}

    if not info_blocks or not isinstance(info_blocks, list):
        print("[DEBUG] info_blocks 為空或不是列表，無標籤可解析")
        return result

    for idx, block in enumerate(info_blocks):
        try:
            # **1. 獲取主標籤名稱 (tag)，位於 block[x][0]**
            if not isinstance(block, list) or len(block) < 3:
                continue  # 這個 block 結構不完整，跳過

            main_tag = block[0]  # 例如 "無障礙程度", "設施", "特色"
            sub_items = block[2]  # 這應該是一個列表，包含所有標籤值

            # **2. 確保 sub_items 是有效的列表**
            if not isinstance(sub_items, list) or len(sub_items) == 0:
                print(f"[DEBUG] 主標籤 `{main_tag}` 沒有對應的 sub_items，跳過")
                continue

            extracted_values = []
            for y, sub_item in enumerate(sub_items):
                # **3. 抓取值 (value)，位於 [x][2][y][1]**
                if (
                    isinstance(sub_item, list)
                    and len(sub_item) > 1  # 確保有足夠的元素
                    and isinstance(sub_item[1], str)  # 確保 value 為字串
                ):
                    extracted_values.append(sub_item[1])  # 抓取 [y][1] 的值

            # **4. 只有當有值時才加入 result**
            if extracted_values:
                result[main_tag] = extracted_values
            else:
                print(f"[DEBUG] 主標籤 `{main_tag}` 沒有可用的值")

        except Exception as e:
            print(f"[ERROR] 解析 info_blocks[{idx}] 時出錯: {e}")

    return result


def fetch_place_info(location_id: str) -> dict:
    """
    主要的抓取 + 解析函式：
    1) 根據 location_id 組合 URL
    2) 拿到 data 後，擷取:
       gmp_location, class, address, summary_1, summary_2,
       opening_hours, entrance_fee, stay_time
    3) 不再寫死 "service_options"/"amenities" 等欄位，
       而是將 data[6][100][1] 動態解析到 "tags" (dict) 裏。
    4) 回傳 dict
    """
    url_template = (
        "https://www.google.com.tw/maps/preview/place?"
        "authuser=0&hl=zh-TW&gl=tw&pb=!1m14!1s{location_id}!"
        "3m9!1m3!1d793.1301842274751!2d121.5650101!3d25.0347827!2m0!"
        "3m2!1i1745!2i337!4f13.1!4m2!3d25.03556637322376!"
        "4d121.5670895576477!13m41!2m2!1i408!2i240!3m2!2i10!5b1!"
        "7m33!1m3!1e1!2b0!3e3!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!"
        "1m3!1e8!2b0!3e3!1m3!1e10!2b0!3e3!1m3!1e10!2b1!3e2!"
        "1m3!1e10!2b0!3e4!1m3!1e9!2b1!3e2!2b1!9b0!14m4!"
        "1skxy7Z5SqHNmT1e8Pher14A4!3b1!7e81!15i10555!15m49!"
        "1m10!4e2!18m7!3b0!6b0!14b1!17b1!20b1!27m1!1b0!"
        "20e2!4b1!10m1!8e3!11m1!3e1!17b1!20m2!1e3!1e6!24b1!"
        "25b1!26b1!29b1!30m1!2b1!36b1!43b1!52b1!55b1!"
        "56m1!1b1!65m5!3m4!1m3!1m2!1i224!2i298!98m3!1b1!"
        "2b1!3b1!107m2!1m1!1e1!114m3!1b1!2m1!1b1!"
        "22m1!1e81!29m0!30m6!3b1!6m1!2b1!7m1!2b1!"
        "9b1!32b1!37i721&q=*&pf=t"
    )

    headers = {
        # 常見
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
        "Cookie": "AEC=AZ6Zc-U7FVfHwDUUSrlXxySu0nJOptgUBiArTbxv1taxhDyDr8g_e0dAhQI; NID=522=hb0nzpYIajoBCvbCPQnBzCEs5TWOkfSpt2uH8BWLkjFlcjDb2_R2Ig8wTr7rd4-tUbR-8-6-aswjBZFprSi_2S2zxlixd3PyL4BvRR7uJUifMS7SWbcLFQH97_xbCPryAnyrYR3Es8DQBGrpL3bWCXffWyegfQ_-Zz3T8C6p-EOx2YPhYPP42WZrhi1ZWSDqcJsdmrv-wK51zSS5BoV41ybRqO8Qhi0; GOOGLE_ABUSE_EXEMPTION=ID=b43ad8acb6c3f13e:TM=1740555454:C=r:IP=1.160.19.244-:S=Y9EOenciSVkUflcd8ncvZx0",
        # sec-ch-ua 與 sec-fetch-* 等
        "sec-ch-prefers-color-scheme": "light",
        "sec-ch-ua": '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Windows"',
        "sec-ch-ua-platform-version": '"10.0.0"',
        "sec-ch-ua-arch": '"x86"',
        "sec-ch-ua-bitness": '"64"',
        "sec-ch-ua-model": '""',
        "sec-ch-ua-wow64": "?0",
        "sec-fetch-dest": "document",
        "sec-fetch-mode": "navigate",
        "sec-fetch-site": "none",
        "sec-fetch-user": "?1",
        "service-worker-navigation-preload": "true",
        # 可能是 Chrome 內部參數
        "x-browser-channel": "stable",
        "x-browser-validation": "1nAW9Rb/M8Lkk97ILDg00FWYjns=",
        "x-browser-year": "2025",
        "X-Client-Data": "CJK2yQEIorbJAQipncoBCN2OywEIkqHLAQiJo8sBCIWgzQEIj9/OARiPzs0B",
        "x-browser-copyright": "Copyright 2025 Google LLC. All rights reserved.",
    }

    url = url_template.format(location_id=location_id)

    # 避免高頻觸發 429
    time.sleep(random.uniform(2, 7))

    resp = requests.get(url, headers=headers)
    print(f"[DEBUG] location_id={location_id}, status_code={resp.status_code}")
    # print("[DEBUG] response text (truncated):", resp.text[:300])

    text_data = resp.text
    # 移除前綴
    text_data = text_data.replace(")]}'", "")

    try:
        data = json.loads(text_data)
        # print("[DEBUG] data type:", type(data))
        # if isinstance(data, list):
        #     print("[DEBUG] len(data):", len(data))
        # 甚至可以印出 data[:10] 或 data[0], data[1], ...
        # print("[DEBUG] data:", data)
    except json.JSONDecodeError:
        data = []

    place_info = {
        "location_id": location_id,
        "gmp_location": None,
        "class": None,
        "address": None,
        "summary_1": None,
        "summary_2": None,
        "opening_hours": {},
        "entrance_fee": None,
        "stay_time": None,
        # 新增一個 "tags" 用來存 [6][100][1] 的所有動態標籤
        "tags": {},
    }

    # ---------- 擷取主要欄位 ----------
    try:
        place_info["gmp_location"] = data[6][11]
    except (IndexError, TypeError):
        pass

    try:
        place_info["class"] = data[6][13]
    except (IndexError, TypeError):
        pass

    try:
        place_info["address"] = data[6][2][0]
    except (IndexError, TypeError):
        pass

    # summary
    try:
        place_info["summary_1"] = data[6][32][0][1]
    except (IndexError, TypeError):
        pass

    try:
        place_info["summary_2"] = data[6][32][1][1]
    except (IndexError, TypeError):
        pass

    # opening_hours
    place_info["opening_hours"] = parse_opening_hours(data)

    # entrance_fee
    try:
        fee_data = data[6][191][11][0][2]
        place_info["entrance_fee"] = parse_entrance_fee(fee_data)
    except (IndexError, TypeError):
        pass

    # 擷取 stay_time: data[6][117][0] (若存在)
    try:
        place_info["stay_time"] = data[6][117][0]
    except (IndexError, TypeError):
        pass

    # ---------- 動態解析 [6][100][1] -> place_info["tags"] ----------
    try:
        info_blocks = data[6][100][1]
    except (IndexError, TypeError):
        info_blocks = None

    place_info["tags"] = parse_all_tags(info_blocks)

    return place_info


def main():
    results = []
    with open(
        "tourism838_location_id.csv", "r", encoding="utf-8"
    ) as f:  # tourism838_location_id.csv
        reader = csv.DictReader(f)
        for row in reader:
            location_id = row.get("location_ID")
            if not location_id:
                continue
            info = fetch_place_info(location_id)
            results.append(info)

    # 以 JSON 檔方式輸出
    with open("gmap_data1.json", "w", encoding="utf-8") as fw:
        json.dump(results, fw, ensure_ascii=False, indent=2)


if __name__ == "__main__":
    main()