In [14]:
import requests
import json
import csv
import time
import pandas as pd
import os
import random


def remove_prefix(response_text):
    """移除回應前的 ")]}'" """
    prefix = ")]}'"
    if response_text.startswith(prefix):
        return response_text[len(prefix) :]
    return response_text


def parse_response(response_text):
    """移除 prefix 並解析 JSON"""
    cleaned_text = remove_prefix(response_text)
    try:
        data = json.loads(cleaned_text)
        return data
    except Exception as e:
        print("JSON 解析錯誤:", e)
        return None


def extract_next_token(data):
    """
    從資料中擷取下一次請求的 token (data[45][1])
    如果無法取得，回傳 None。
    """
    try:
        return data[45][1]
    except Exception:
        return None


def extract_location_data(data):
    """
    從 data[45][0] 中的所有紀錄擷取含「台北標準時間」的 gmap_location 與 location_ID。
    回傳形式為列表，包含多筆 (gmap_location, location_ID)；若無符合則回傳空列表。
    """
    results = []
    try:
        entries = data[45][0]
        if not entries:
            return results

        for entry in entries:
            # 檢查是否含有「台北標準時間」
            try:
                if "台北標準時間" not in entry[4][13][1][1]:
                    continue
            except Exception:
                continue

            try:
                gmap_location = entry[1][0][4]
            except Exception:
                gmap_location = None

            try:
                location_ID = entry[1][0][2]
            except Exception:
                location_ID = None

            if gmap_location and location_ID:
                results.append((gmap_location, location_ID))

    except Exception as e:
        print("擷取位置資料發生錯誤:", e)

    return results


def main():
    # 從 "user_list.csv" 讀取 user_id
    user_df = pd.read_csv("./user_list.csv")
    user_ids = user_df["user_id"].tolist()

    # # 從 "KEI_list.csv" 讀取 kei
    # kei_df = pd.read_csv("./KEI_list.csv")
    # keis = kei_df["kei"].tolist()

    # 輸出 CSV
    output_csv = "./ja_ac_users_location.csv"
    if not os.path.exists(output_csv):
        with open(output_csv, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(
                f, fieldnames=["gmap_location", "location_ID", "count"]
            )
            writer.writeheader()

    base_url = (
        "https://www.google.com/locationhistory/preview/mas?"
        "authuser=0&hl=zh-TW&gl=tw&pb="
        "!1s{user_id}"
        "!2m3!1sRKmyZ6jiBuSjvr0PrcH2mA0"
        "!7e81!15i14416!6m2!4b1!7b1!9m0"
        "!17m28!1m6!1m2!1i0!2i0!2m2!1i530!2i306"
        "!1m6!1m2!1i1870!2i0!2m2!1i1920!2i306"
        "!1m6!1m2!1i0!2i0!2m2!1i1920!2i20"
        "!1m6!1m2!1i0!2i286!2m2!1i1920!2i306"
        "!18m9!1m3!1d46090.09471573684"
        "!2d121.5320757!3d25.0371489"
        "!2m0!3m2!1i1920!2i306"
        "!4f13.1!41m15!1i20!2m9!2b1!3b1!5b1!7b1"
        "!12m4!1b1!2b1!4m1!1e1"
        "!3s{token}"
        "!7m2!1m1!1e1"
    )

    headers = {
        # 常見
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "zh-TW,zh;q=0.9",
        "Cookie": "HSID=AsCphpasVpuXXhfx0; SSID=AeS0sARt72MqDFWjY; APISID=zd4PcSGVo6TqopM6/AQlJIPbc61tEckMu1; SAPISID=3_NIvVOqsuwaNBqT/AUo8cwWvVGdGkoQCU; __Secure-1PAPISID=3_NIvVOqsuwaNBqT/AUo8cwWvVGdGkoQCU; __Secure-3PAPISID=3_NIvVOqsuwaNBqT/AUo8cwWvVGdGkoQCU; SID=g.a000tQiOXTWek3i1vDK70xQC08dcutsXZZKKM6g4NcHmDkED94bdkS8EhyufeL7vVl5ji3K7nAACgYKAcgSARASFQHGX2Mi3a-1mAcBEJYm4zYV3MslSxoVAUF8yKrQ8d8Ljm78lC6drHuENk6E0076; __Secure-1PSID=g.a000tQiOXTWek3i1vDK70xQC08dcutsXZZKKM6g4NcHmDkED94bdI7SuvCwnUh2KBQvasD3kyQACgYKAWUSARASFQHGX2MiCfI-3wb6x9HG7wSmARrtQBoVAUF8yKqyZWhtP8rHcXV_OB7M24Ym0076; __Secure-3PSID=g.a000tQiOXTWek3i1vDK70xQC08dcutsXZZKKM6g4NcHmDkED94bdPv_dKsLccWoQn1H4gLfqmAACgYKAWsSARASFQHGX2MiT03Ta8K435rKMq7A39CUPRoVAUF8yKpjoiFxanitNEwcS-uO3pHX0076; SEARCH_SAMESITE=CgQIqZ0B; AEC=AVcja2dFDXGE3rsduy3h5qkTl5VLrpbG64jcMio87tQfyC3mqAbS_S1ORkM; NID=522=edQ0W_Y-_y4Ljx6cn5XwIN6w290PL_p-R9dUsmjBkv7xPu3NICh2aaNmhtaHwuwHMqETxv5OP9ametwRXiLArGQ7pQl2W7lDqi8kRqxnoK1UvM6T_vxrQCVEVWfZkd_YBLZ7vmkl4P1aDTv2Wzs42pfBkBQYSfT0UTh_bcd0YracAsmR2A9b9Fkj0dANkfPnPNZqPxjsVrZtWT0Tx3jMT3HbFV8wbhHPGXT4xX60CzTLi6K1O0TAIKHHayrkzdHVSrMHqzz2ffuf2sNOYRFmip4jx5mZJw6py1P5HbwJ18o-HVdKsZnKk0d6ZKv_zjqLPCqEya1o7J4l6M3hDsUmK5Mo6zaFAQDUrEWf11j92JcwF_OS7_3eRchVdNnMaph2GAv-WzCS4hp1KOdTp00ZPNWZ2-kceKZadMyThyjlzXWXKvp3O_6kSA6Yl3bT9hsdAL3v9j7Eipa-BjSb-2zRZ_ovy5Ssm4mIU97b5OoRpdantvb_BP65iuZiFQaWiyiLti_6yZhG; __Secure-1PSIDTS=sidts-CjIBEJ3XVxx0ciX0jSpV_7_6UTwgBTW5v9tCHX1sh3SCMt534oSEPEbUjkCcwN0uA2ER4hAA; __Secure-3PSIDTS=sidts-CjIBEJ3XVxx0ciX0jSpV_7_6UTwgBTW5v9tCHX1sh3SCMt534oSEPEbUjkCcwN0uA2ER4hAA; SIDCC=AKEyXzXTZYoCM-v2bosfv54YL2RQR7NTYsVIqFEVdaQVuxArWsEUhZhnzxm21207kJqjml3pGlo; __Secure-1PSIDCC=AKEyXzVk_kF0Elgd7EQvxiS5n4cDKgEmogKSNFmoks3DZLwnbwMQC96tZbAdEvZEal6lwx3_CIMl; __Secure-3PSIDCC=AKEyXzXYAR9g7na4EJrxdcnqRasjXD1CyH030KhTFFXbgIX7AMpa0uAiXa7sRquf4hkqqZ1puAA; GOOGLE_ABUSE_EXEMPTION=ID=6372eb6ac2d4e178:TM=1740572753:C=r:IP=111.235.253.145-:S=2fau0LgyZgznn-No0ESzv2Q",
        # sec-ch-ua 與 sec-fetch-* 等
        "sec-ch-prefers-color-scheme":"dark",
        "sec-ch-ua": '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Windows"',
        "sec-ch-ua-platform-version": '"10.0.0"',
        "sec-ch-ua-arch": '"x86"',
        "sec-ch-ua-bitness": '"64"',
        "sec-ch-ua-model": '""',
        "sec-ch-ua-wow64": "?0",
        "sec-fetch-dest": "document",
        "sec-fetch-mode": "navigate",
        "sec-fetch-site": "none",
        "sec-fetch-user": "?1",
        "service-worker-navigation-preload": "true",
        # 可能是 Chrome 內部參數
        "x-browser-channel": "stable",
        "x-browser-validation": "1nAW9Rb/M8Lkk97ILDg00FWYjns=",
        "x-browser-year": "2025",
        "X-Client-Data":"CIu2yQEIpLbJAQipncoBCJ3iygEIlKHLAQiJo8sBCIWgzQEIxc3OAQj92s4BCMnczgEIi9/OAQjI4s4BCMrizgEY9MnNAQ==",
        "x-browser-copyright": "Copyright 2025 Google LLC. All rights reserved.",
    }

    # 寫入資料
    with open(output_csv, "a", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["gmap_location", "location_ID", "count"])

        for user_id in user_ids:
            # kei = random.choice(keis)
            token = ""
            no_data_count = 0  # 記錄無'台北標準時間'的次數

            while True:
                url = base_url.format(user_id=user_id, token=token) # kei=kei,
                print(
                    f"[User: {user_id}] no_data_count={no_data_count} (連續無台北標準時間次數)"
                )
                print("Requesting URL:", url)

                try:
                    response = requests.get(url,headers=headers)
                    if response.status_code != 200:
                        print(f"Error {response.status_code} for URL: {url}")
                        break

                    data = parse_response(response.text)
                    if data is None:
                        break

                except Exception as e:
                    print(f"Request 發生錯誤: {e}")
                    break

                # 擷取回應中所有含台北標準時間的紀錄
                all_locations = extract_location_data(data)

                if all_locations:
                    for gmap_location, location_ID in all_locations:
                        writer.writerow(
                            {
                                "gmap_location": gmap_location,
                                "location_ID": location_ID,
                                "count": 1,
                            }
                        )
                    f.flush()
                    no_data_count = 0
                else:
                    no_data_count += 1
                    # 如果連續 5 次的response都沒有'台北標準時間' => 結束
                    if no_data_count >= 5:
                        print(f"連續 {no_data_count} 次無台北標準時間，結束爬取。")
                        break

                # 擷取下一次請求所需的 token
                next_token = extract_next_token(data)
                if not next_token:
                    print("沒有下一頁 token，結束該使用者爬取。")
                    break
                else:
                    token = next_token

                time.sleep(random.uniform(3, 7))

            print(f"已完成 user {user_id} 的爬取流程\n")
            time.sleep(random.uniform(4, 10))

    print("所有資料已寫入", output_csv)


if __name__ == "__main__":
    main()

[User: 102670587917365187015] no_data_count=0 (連續無台北標準時間次數)
Requesting URL: https://www.google.com/locationhistory/preview/mas?authuser=0&hl=zh-TW&gl=tw&pb=!1s102670587917365187015!2m3!1sRKmyZ6jiBuSjvr0PrcH2mA0!7e81!15i14416!6m2!4b1!7b1!9m0!17m28!1m6!1m2!1i0!2i0!2m2!1i530!2i306!1m6!1m2!1i1870!2i0!2m2!1i1920!2i306!1m6!1m2!1i0!2i0!2m2!1i1920!2i20!1m6!1m2!1i0!2i286!2m2!1i1920!2i306!18m9!1m3!1d46090.09471573684!2d121.5320757!3d25.0371489!2m0!3m2!1i1920!2i306!4f13.1!41m15!1i20!2m9!2b1!3b1!5b1!7b1!12m4!1b1!2b1!4m1!1e1!3s!7m2!1m1!1e1
[User: 102670587917365187015] no_data_count=1 (連續無台北標準時間次數)
Requesting URL: https://www.google.com/locationhistory/preview/mas?authuser=0&hl=zh-TW&gl=tw&pb=!1s102670587917365187015!2m3!1sRKmyZ6jiBuSjvr0PrcH2mA0!7e81!15i14416!6m2!4b1!7b1!9m0!17m28!1m6!1m2!1i0!2i0!2m2!1i530!2i306!1m6!1m2!1i1870!2i0!2m2!1i1920!2i306!1m6!1m2!1i0!2i0!2m2!1i1920!2i20!1m6!1m2!1i0!2i286!2m2!1i1920!2i306!18m9!1m3!1d46090.09471573684!2d121.5320757!3d25.0371489!2m0!3m2!1i1920!2i306!4f13.1!4

KeyboardInterrupt: 

In [3]:
url = "https://www.google.com/locationhistory/preview/mas?authuser=0&hl=zh-TW&gl=tw&pb=!1s102593444098371563490!2m3!1sRKmyZ6jiBuSjvr0PrcH2mA0!7e81!15i14416!6m2!4b1!7b1!9m0!17m28!1m6!1m2!1i0!2i0!2m2!1i530!2i306!1m6!1m2!1i1870!2i0!2m2!1i1920!2i306!1m6!1m2!1i0!2i0!2m2!1i1920!2i20!1m6!1m2!1i0!2i286!2m2!1i1920!2i306!18m9!1m3!1d46090.09471573684!2d121.5320757!3d25.0371489!2m0!3m2!1i1920!2i306!4f13.1!41m15!1i20!2m9!2b1!3b1!5b1!7b1!12m4!1b1!2b1!4m1!1e1!3s!7m2!1m1!1e1"
response = requests.get(url)
print(response)
time.sleep(random.uniform(4, 6))

<Response [429]>


In [5]:
print(url)

https://www.google.com/locationhistory/preview/mas?authuser=0&hl=zh-TW&gl=tw&pb=!1s102593444098371563490!2m3!1sRKmyZ6jiBuSjvr0PrcH2mA0!7e81!15i14416!6m2!4b1!7b1!9m0!17m28!1m6!1m2!1i0!2i0!2m2!1i530!2i306!1m6!1m2!1i1870!2i0!2m2!1i1920!2i306!1m6!1m2!1i0!2i0!2m2!1i1920!2i20!1m6!1m2!1i0!2i286!2m2!1i1920!2i306!18m9!1m3!1d46090.09471573684!2d121.5320757!3d25.0371489!2m0!3m2!1i1920!2i306!4f13.1!41m15!1i20!2m9!2b1!3b1!5b1!7b1!12m4!1b1!2b1!4m1!1e1!3s!7m2!1m1!1e1
