In [None]:
import requests
import json
import csv
from datetime import datetime, timezone
from time import sleep

# 讀取 CSV 檔案，確保可以讀取所有景點
input_csv = "test.csv"
with open(input_csv, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    # locations = list(reader)  # 讀取所有行，避免只讀取第一行
    # 如果用all.csv的table格式直接輸入，請用下方
    locations = [(row[2], row[5]) for row in reader if len(row) > 4]

# **處理每個景點**
for location, url_template in locations:
    print(f"開始爬取景點：{location}")
    
    # 設定輸出檔案名稱
    output_file = f'./CSV/6/reviews_output_{location}.csv'

    # 變數初始化
    unique_ids = set()
    total_reviews = 0
    reviews_with_comments = 0
    list_2s = []
    list_2s.clear()

    count = 1
    google = ""  # 初始 2s 碼

    # **抓取評論，直到沒有下一個 2s 碼**
    while True:
        list_2s.append({'page': count, 'code_2s': google})

        # **動態替換 URL 內的 2s 參數**
        url = url_template.replace("2s", f"2s{google}")

        response = requests.get(url)
        response.raise_for_status()
        raw_content = response.text.strip()

        # **移除開頭的 ")]}'"**
        if raw_content.startswith(")]}'"):
            raw_content = raw_content[4:]

        data = json.loads(raw_content)
                 
        # **解析評論並儲存**
        with open(output_file, mode='a', encoding='utf-8', newline='') as file:
            writer = csv.writer(file)
            for i in range(len(data[2])):
                try:
                    user = data[2][i][0][1][4][5][0]
                    user_id = data[2][i][0][1][4][5][3]
                    user_page = data[2][i][0][1][4][2][0]
                    review_id = str(data[2][i][0][0])
                    rating = str(data[2][i][0][2][0][0])
                    timestamp_seconds = data[2][i][0][1][2] // 1000000
                    formatted_date = datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc).strftime('%Y-%m-%d')

                    try:
                        comment = data[2][i][0][2][15][0][0]
                    except (IndexError, TypeError):
                        comment = ""
                    
                    try:
                        language = data[2][i][0][2][14][0]
                    except (IndexError, TypeError):
                        language = ""   
                    
                    try:
                        translated = data[2][i][0][2][15][1][0]
                    except (IndexError, TypeError):
                        translated = ""

                    # **確保 ID 唯一，不重複儲存**
                    if review_id not in unique_ids:
                        writer.writerow([user, user_id, review_id, rating, formatted_date, comment, language, translated,user_page])
                        unique_ids.add(review_id)
                        total_reviews += 1
                        if comment.strip():
                            reviews_with_comments += 1
                except Exception as e:
                    print(f"評論 {i + 1}: 無評論或格式錯誤 ({e})")

            print(f"總共存入 {total_reviews} 筆評論")
            print(f"其中有 {reviews_with_comments} 筆評論有內文")

            count += 1
        
        # **嘗試取得下一個 2s 碼，若無則跳出迴圈**
        try:
            google = data[1].replace('=', '%3D')
        
        except:
            break

        
        
        

    print(f"景點 {location} 爬取結束")
    print("=" * 50)

print("所有景點爬取完成")
