In [None]:
def scrape_tripadvisor_reviews(url: str, max_pages: int = 50, page_timeout_ms: int = 15000) -> List[Dict[str, Any]]:
    reviews: List[Dict[str, Any]] = []

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True, args=[
            "--no-sandbox",
            "--disable-setuid-sandbox",
            "--disable-dev-shm-usage",
        ])
        context = browser.new_context(
            user_agent=(
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
            ),
            locale="en-US",
            viewport={"width": 1366, "height": 900},
            extra_http_headers={"Accept-Language": "en-US,en;q=0.9"},
        )
        page = context.new_page()
        page.set_default_timeout(page_timeout_ms)

        # 進入頁面
        page.goto(url, wait_until="domcontentloaded")
        _rand_sleep()

        # Cookie/consent
        try:
            consent = page.get_by_role("button", name=re.compile(r"(Accept|Agree|I agree|OK)", re.I))
            if consent.count() > 0 and consent.first.is_visible():
                consent.first.click()
                _rand_sleep()
        except Exception:
            pass

        # 切到 Reviews 分頁（若當前不是）
        try:
            reviews_tab = page.get_by_role("tab", name=re.compile(r"Reviews", re.I))
            if reviews_tab.count() > 0 and reviews_tab.first.is_visible():
                reviews_tab.first.click()
                page.wait_for_load_state("domcontentloaded")
                _rand_sleep()
        except Exception:
            pass

        # 關閉自動翻譯（Show original reviews）
        try:
            show_original = page.get_by_role("button", name=re.compile(r"(Show original reviews|Show original)", re.I))
            if show_original.count() > 0 and show_original.first.is_visible():
                show_original.first.click()
                _rand_sleep()
        except Exception:
            pass

        # 等待評論卡出現
        try:
            page.wait_for_selector("[data-automation='reviewCard'], div[data-test-target='review-card']", timeout=20000)
        except PWTimeoutError:
            # 沒抓到卡也繼續試
            pass

        visited_page_urls = set()
        page_index = 1

        while page_index <= max_pages:
            visited_page_urls.add(page.url)

            # 展開「Read more」
            try:
                expanders = page.locator("text=/^(Read more|More|Show more|更多|もっと読む)$/i")
                cnt = expanders.count()
                for i in range(min(cnt, 20)):
                    try:
                        expanders.nth(i).click(timeout=1000)
                        _rand_sleep(0.2, 0.5)
                    except Exception:
                        pass
            except Exception:
                pass

            # 鎖定卡片
            card_selectors = [
                "[data-automation='reviewCard']",
                "div[data-test-target='review-card']",
                "div[data-test-target='HR_CC_CARD']",
            ]
            cards = page.locator(", ".join(card_selectors))
            count = cards.count()

            for i in range(count):
                card = cards.nth(i)

                # 標題
                title = None
                for sel in [
                    "[data-automation='reviewTitle']",
                    "a[data-test-target='review-title']",
                    "span[data-test-target='review-title']",
                    "h3, h4",
                ]:
                    loc = card.locator(sel)
                    if loc.count():
                        try:
                            title = loc.first.inner_text().strip()
                            if title:
                                break
                        except Exception:
                            pass

                # 內文
                text = None
                loc = card.locator("[data-automation='reviewText']")
                if loc.count():
                    text = pick_longest_text(loc)
                if not text:
                    loc = card.locator(":scope span[lang]")
                    text = pick_longest_text(loc)
                if not text:
                    loc = card.locator(":scope p, :scope q, :scope div")
                    text = pick_longest_text(loc)

                # 評分（aria-label * bubbles）
                rating = None
                try:
                    rate_el = card.locator("[aria-label*='bubbles']").first
                    if rate_el.count() == 0:
                        rate_el = card.locator("svg[aria-label*='bubbles'], span[aria-label*='bubbles']").first
                    if rate_el and rate_el.count():
                        label = rate_el.get_attribute("aria-label") or ""
                        rating = extract_rating(label)
                except Exception:
                    pass

                # 日期
                written_date = None
                travel_date = None
                try:
                    wd = card.locator("span:has-text('Written')")
                    if wd.count():
                        written_date = wd.first.inner_text().strip()
                except Exception:
                    pass
                try:
                    exp = card.locator(":scope :text('Date of experience')")
                    if exp.count():
                        travel_date = exp.first.inner_text().strip()
                    else:
                        blob = card.inner_text()
                        m = re.search(r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{4}", blob, re.I)
                        if m:
                            travel_date = m.group(0)
                except Exception:
                    pass

                # 語言（若能從 span[lang] 讀到）
                language = None
                try:
                    lang_spans = card.locator("span[lang]")
                    if lang_spans.count():
                        language = lang_spans.first.get_attribute("lang")
                except Exception:
                    pass

                # 作者與地點
                author = None
                location_txt = None
                try:
                    name_loc = card.locator("[data-automation='memberName']")
                    if name_loc.count():
                        author = name_loc.first.inner_text().strip() or None
                    else:
                        links = card.get_by_role("link")
                        if links.count():
                            author = links.first.inner_text().strip() or None
                except Exception:
                    pass
                try:
                    loc_loc = card.locator("[data-automation='reviewerLocation'], span[data-test-target='reviewer-location']")
                    if loc_loc.count():
                        location_txt = loc_loc.first.inner_text().strip()
                except Exception:
                    pass

                # 貢獻數 / 有用數
                contribution_count = None
                helpful_votes = None
                try:
                    contrib = card.locator(":scope span:has-text('contribution')")
                    if contrib.count():
                        m = re.search(r"(\d+)", contrib.first.inner_text())
                        if m:
                            contribution_count = int(m.group(1))
                except Exception:
                    pass
                try:
                    helpful = card.locator(":scope span:has-text('helpful')")
                    if helpful.count():
                        m = re.search(r"(\d+)", helpful.first.inner_text())
                        if m:
                            helpful_votes = int(m.group(1))
                except Exception:
                    pass

                reviews.append(Review(
                    title=title,
                    text=text,
                    rating=rating,
                    travel_date=travel_date,
                    written_date=written_date,
                    language=language,
                    author=author,
                    location=location_txt,
                    contribution_count=contribution_count,
                    helpful_votes=helpful_votes,
                    url=page.url,
                ).to_dict())

            # 下一頁
            next_clicked = False
            next_selectors = [
                "nav[aria-label='Pagination'] a[aria-label*='Next']",
                "a[aria-label='Next page']",
                "a[aria-label='Next']",
                "button[aria-label='Next']",
                "li[title='Next Page'] a",
                "a[data-page-number][aria-label*='Next']",
            ]
            for sel in next_selectors:
                try:
                    loc = page.locator(sel)
                    if loc.count() and loc.first.is_visible():
                        el = loc.first
                        if el.is_enabled():
                            el.click()
                            _rand_sleep(1.0, 2.0)
                            page.wait_for_load_state("domcontentloaded")
                            # 避免分頁迴圈
                            if page.url in visited_page_urls:
                                next_clicked = False
                            else:
                                next_clicked = True
                                break
                except Exception:
                    pass

            if not next_clicked:
                break

            page_index += 1

        context.close()
        browser.close()

    return reviews

In [1]:
# app.py
import re
import time
import random
from dataclasses import dataclass, asdict
from typing import Any, Dict, List, Optional

from flask import Flask, request, jsonify
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeoutError

app = Flask(__name__)

# ---------- Small helpers ----------

def _rand_sleep(a: float = 0.4, b: float = 1.2):
    time.sleep(random.uniform(a, b))

In [None]:
    url = "https://www.tripadvisor.com/Attraction_Review-g295415-d555731-Reviews-Kuang_Si_Falls-Luang_Prabang_Luang_Prabang_Province.html"
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True, args=[
            "--no-sandbox",
            "--disable-setuid-sandbox",
            "--disable-dev-shm-usage",
        ])
        context = browser.new_context(
            user_agent=(
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
            ),
            locale="en-US",
            viewport={"width": 1366, "height": 900},
            extra_http_headers={"Accept-Language": "en-US,en;q=0.9"},
        )
        page = context.new_page()
        page.set_default_timeout(3000)

        # 進入頁面
        page.goto(url, wait_until="domcontentloaded")
        _rand_sleep()

        # Cookie/consent
        try:
            consent = page.get_by_role("button", name=re.compile(r"(Accept|Agree|I agree|OK)", re.I))
            if consent.count() > 0 and consent.first.is_visible():
                consent.first.click()
                _rand_sleep()
        except Exception:
            pass

Error: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.