In [1]:
API_KEYS = [
    "",
    "",
    "",
]


In [25]:
# -------------------- CONFIG --------------------

CSV_PATH = "destination_activities_generalised.csv"  # your CSV with: location, activity_1..activity_5

TOP_N_ACTIVITIES        = None   # None = all activity columns; or set e.g. 3
TARGET_REVIEWS_PER_QUERY = 100   # stop early once we reach this many reviews
MAX_REVIEWS_PER_QUERY    = 200   # hard cap per query

PAGE_SIZE               = 20     # max allowed by API v1
NEXT_PAGE_DELAY_SEC     = 2.0    # short pause before using nextPageToken
REQUESTS_PER_SECOND     = 2.0    # throttle all HTTP calls
DETAILS_SLEEP_SEC       = 0.15   # tiny pause between details
MAX_RETRIES             = 4

# -------------------- IMPORTS --------------------
import time, json, math, random, requests
import pandas as pd
from typing import List, Dict, Any, Optional, Tuple

# -------------------- BUILD QUERIES --------------------
def build_dict_from_csv(csv_path: str, top_n: Optional[int]) -> Dict[str, List[str]]:
    df = pd.read_csv(csv_path)
    cols = list(df.columns)
    if len(cols) < 6:
        raise ValueError("CSV must have at least 6 columns: location + 5 activities.")
    location_col = cols[0]
    activity_cols = [c for c in cols if c.lower().startswith("activity_")] or cols[1:6]

    acts_by_loc: Dict[str, List[str]] = {}
    for _, row in df.iterrows():
        loc = str(row[location_col]).strip()
        acts = []
        for c in activity_cols:
            v = row.get(c, None)
            if pd.notna(v):
                s = str(v).strip()
                if s: acts.append(s)
        if top_n is not None:
            acts = acts[:top_n]
        acts_by_loc[loc] = acts
    return acts_by_loc

def build_queries_from_dict(acts_by_loc: Dict[str, List[str]]) -> List[str]:
    return [f"{a} in {loc}" for loc, acts in acts_by_loc.items() for a in acts]

# -------------------- KEY ROTATION & THROTTLE --------------------
class KeyRotator:
    def __init__(self, keys: List[str]):
        if not keys: raise ValueError("Provide at least one API key.")
        self.keys = keys
        self.i = 0
    def next(self) -> str:
        k = self.keys[self.i]
        self.i = (self.i + 1) % len(self.keys)
        return k

rotator = KeyRotator(API_KEYS)
_last_request_ts = 0.0

def _throttle():
    global _last_request_ts
    interval = 1.0 / max(0.1, REQUESTS_PER_SECOND)
    now = time.time()
    sleep_for = max(0.0, _last_request_ts + interval - now)
    if sleep_for: time.sleep(sleep_for)
    _last_request_ts = time.time()

# -------------------- API v1 CALLS (your working pattern) --------------------
SEARCH_URL  = "https://places.googleapis.com/v1/places:searchText"
DETAILS_URL = "https://places.googleapis.com/v1/places/{place_id}"

def _post_json(url: str, headers: Dict[str,str], json_body: Dict[str,Any]) -> Dict[str,Any]:
    # basic retry/backoff on 429 / 5xx
    delay = 1.0
    for attempt in range(1, MAX_RETRIES+1):
        _throttle()
        r = requests.post(url, headers=headers, json=json_body, timeout=30)
        if r.status_code == 200:
            return r.json()
        if r.status_code in (429, 500, 502, 503, 504):
            time.sleep(delay); delay = min(delay*2, 20)
            continue
        # surface errors
        try:
            msg = r.json()
        except Exception:
            msg = r.text
        raise RuntimeError(f"POST {url} failed ({r.status_code}): {msg}")
    # last try
    _throttle()
    r = requests.post(url, headers=headers, json=json_body, timeout=30)
    try:
        return r.json()
    except Exception:
        raise RuntimeError(f"POST {url} failed after retries: {r.status_code}")

def _get_json(url: str, headers: Dict[str,str]) -> Dict[str,Any]:
    delay = 1.0
    for attempt in range(1, MAX_RETRIES+1):
        _throttle()
        r = requests.get(url, headers=headers, timeout=30)
        if r.status_code == 200:
            return r.json()
        if r.status_code in (429, 500, 502, 503, 504):
            time.sleep(delay); delay = min(delay*2, 20)
            continue
        try:
            msg = r.json()
        except Exception:
            msg = r.text
        raise RuntimeError(f"GET {url} failed ({r.status_code}): {msg}")
    _throttle()
    r = requests.get(url, headers=headers, timeout=30)
    try:
        return r.json()
    except Exception:
        raise RuntimeError(f"GET {url} failed after retries: {r.status_code}")

def places_search_text_v1(query: str, key: str, page_token: Optional[str] = None) -> Dict[str,Any]:
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": key,
        # Include nextPageToken in mask (as you did)
        "X-Goog-FieldMask": "places.id,places.displayName,places.formattedAddress,nextPageToken",
    }
    body = {
        "textQuery": query,
        "pageSize": PAGE_SIZE
    }
    if page_token:
        body["pageToken"] = page_token
    return _post_json(SEARCH_URL, headers, body)

def place_details_v1(place_id: str, key: str) -> Dict[str,Any]:
    """
    Places API (New) details request with an explicit field mask for review subfields.
    """
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": key,
        # Ask only for what we need – include nested review fields explicitly
        "X-Goog-FieldMask": (
            "id,displayName,formattedAddress,rating,userRatingCount,"
            "reviews.rating,"
            "reviews.text,"                       # {text, languageCode}
            "reviews.originalText,"               # {text, languageCode}
            "reviews.authorAttribution.displayName,"
            "reviews.publishTime,"
            "reviews.relativePublishTimeDescription"
        ),
    }
    url = DETAILS_URL.format(place_id=place_id)
    return _get_json(url, headers)

# -------------------- RESUMABLE IN-MEMORY STATE --------------------
def init_state_new(queries: List[str], prev_state: Optional[Dict[str,Any]] = None) -> Dict[str,Any]:
    state = prev_state.copy() if prev_state else {"queries": {}, "completed": []}
    for q in queries:
        if q not in state["queries"]:
            state["queries"][q] = {
                "nextPageToken": None,
                "places_done": {},          # place_id -> True
                "reviews_collected": 0,
                "done": False
            }
    return state

# -------------------- ONE QUERY (one page per pass) --------------------
def run_one_query_new(query: str, state: Dict[str,Any], verbose: bool = True) -> List[Dict[str,Any]]:
    q = state["queries"][query]
    rows: List[Dict[str,Any]] = []

    if q["done"] or q["reviews_collected"] >= MAX_REVIEWS_PER_QUERY:
        q["done"] = True
        if query not in state["completed"]: state["completed"].append(query)
        if verbose: print(f"[{query}] already done (collected={q['reviews_collected']}).")
        return rows

    if q["nextPageToken"]:
        time.sleep(NEXT_PAGE_DELAY_SEC)

    resp = places_search_text_v1(query, rotator.next(), page_token=q["nextPageToken"])
    places = resp.get("places", []) or []
    q["nextPageToken"] = resp.get("nextPageToken")

    if verbose:
        print(f"[{query}] places={len(places)}, nextPageToken={bool(q['nextPageToken'])}")

    # optional priority: most-rated first (if userRatingCount present)
    places.sort(key=lambda p: p.get("userRatingCount", 0), reverse=True)

    for p in places:
        if q["reviews_collected"] >= MAX_REVIEWS_PER_QUERY:
            q["done"] = True
            if query not in state["completed"]: state["completed"].append(query)
            break

        pid = p.get("id")
        if not pid or q["places_done"].get(pid):
            continue

        det = place_details_v1(pid, rotator.next())
        reviews = det.get("reviews", []) or []

        name = (det.get("displayName") or {}).get("text", p.get("displayName", {}).get("text",""))
        addr = det.get("formattedAddress", p.get("formattedAddress",""))
        rating = det.get("rating")
        count  = det.get("userRatingCount")

        remain = MAX_REVIEWS_PER_QUERY - q["reviews_collected"]
        to_take = reviews[:remain]

        for r in to_take:
            # --- SAFE extraction for text + language ---
            txt_obj = r.get("text")
            if isinstance(txt_obj, dict):
                review_text = (txt_obj.get("text") or "")
                lang_from_text = txt_obj.get("languageCode")
            else:
                review_text = (txt_obj or "")
                lang_from_text = None

            orig_obj = r.get("originalText")
            lang_from_orig = None
            if isinstance(orig_obj, dict):
                lang_from_orig = orig_obj.get("languageCode")
                # if you prefer original text over translated:
                if not review_text:
                    review_text = (orig_obj.get("text") or "")

            review_language = lang_from_text or lang_from_orig

            # Normalize text (only if it's a string)
            if isinstance(review_text, str):
                review_text = review_text.replace("\n", " ").strip()

            # split "activity in location"
            activity, location = None, None
            if " in " in query:
                parts = query.rsplit(" in ", 1)
                activity, location = parts[0], parts[1]


            rows.append({
                "query": query,
                "activity": activity,
                "location": location,
                "place_id": pid,
                "place_name": name,
                "formatted_address": addr,
                "rating": rating,
                "user_ratings_total": count,
                "review_author_name": ((r.get("authorAttribution") or {}).get("displayName")),
                "review_rating": r.get("rating"),
                "review_relative_time": r.get("relativePublishTimeDescription"),
                "review_text": review_text,
                "review_language": review_language,
                "publishTime": r.get("publishTime"),
            })

        q["reviews_collected"] += len(to_take)
        q["places_done"][pid] = True

        time.sleep(DETAILS_SLEEP_SEC)

        if q["reviews_collected"] >= TARGET_REVIEWS_PER_QUERY:
            q["done"] = True
            if query not in state["completed"]: state["completed"].append(query)
            break

    if not q["nextPageToken"]:
        q["done"] = True
        if query not in state["completed"]: state["completed"].append(query)

    if verbose and not rows:
        print(f"[{query}] no reviews appended this pass.")
    return rows

# -------------------- BATCH RUNNER --------------------
def run_all_queries_new(
    prev_state: Optional[Dict[str,Any]] = None,
    max_queries_per_run: Optional[int] = 50
) -> Tuple[pd.DataFrame, Dict[str,Any]]:
    acts_by_loc = build_dict_from_csv(CSV_PATH, TOP_N_ACTIVITIES)
    queries = build_queries_from_dict(acts_by_loc)
    state = init_state_new(queries, prev_state)

    pending = [q for q in queries if not state["queries"][q]["done"]]
    if max_queries_per_run is not None:
        pending = pending[:max_queries_per_run]

    new_rows: List[Dict[str,Any]] = []
    for idx, q in enumerate(pending, 1):
        print(f"[{idx}/{len(pending)}] {q}")
        new_rows.extend(run_one_query_new(q, state, verbose=True))

    df_batch = pd.DataFrame(new_rows, columns=[
        "query","activity","location",
        "place_id","place_name","formatted_address",
        "rating","user_ratings_total","review_author_name","review_rating",
        "review_relative_time","review_text","review_language","publishTime"
    ])

    return df_batch, state

#-------------- PROGRESS ----------------------

def summarize_progress(state: dict, target: int = 100) -> pd.DataFrame:
    """
    Build a DataFrame showing progress for every query in `state`.
    Assumes query format: "<activity> in <location>".
    """
    rows = []
    qmap = state.get("queries", {})
    for q, s in qmap.items():
        # try to split "activity in location" (robust to accidental extra " in ")
        activity, location = None, None
        if " in " in q:
            parts = q.rsplit(" in ", 1)
            activity = parts[0]
            location = parts[1]
        rows.append({
            "query": q,
            "location": location,
            "activity": activity,
            "reviews_collected": s.get("reviews_collected", 0),
            "done": bool(s.get("done", False)),
            "has_next_page": bool(s.get("nextPageToken")),
            "places_processed": len(s.get("places_done", {})),
            "remaining_to_target": max(0, target - int(s.get("reviews_collected", 0))),
        })
    df = pd.DataFrame(rows).sort_values(
        ["done", "remaining_to_target", "reviews_collected"],
        ascending=[True, True, False]
    ).reset_index(drop=True)

    return df


In [11]:
df_batch, state = run_all_queries_new(prev_state=None, max_queries_per_run=50)
df_batch.head()

[1/50] immersive experience in London
[immersive experience in London] places=20, nextPageToken=True
[2/50] theatre in London
[theatre in London] places=20, nextPageToken=True
[3/50] boat cruise in London
[boat cruise in London] places=20, nextPageToken=True
[4/50] museum in London
[museum in London] places=20, nextPageToken=True
[5/50] market in London
[market in London] places=20, nextPageToken=True
[6/50] boat cruise in Paris
[boat cruise in Paris] places=20, nextPageToken=True
[7/50] landmark in Paris
[landmark in Paris] places=20, nextPageToken=True
[8/50] museum in Paris
[museum in Paris] places=20, nextPageToken=True
[9/50] neighborhood in Paris
[neighborhood in Paris] places=2, nextPageToken=False
[neighborhood in Paris] no reviews appended this pass.
[10/50] park/garden in Paris
[park/garden in Paris] places=20, nextPageToken=True
[11/50] architecture in Barcelona
[architecture in Barcelona] places=20, nextPageToken=True
[12/50] water sport in Barcelona
[water sport in Barcelo

Unnamed: 0,query,place_id,place_name,formatted_address,rating,user_ratings_total,review_author_name,review_rating,review_relative_time,review_text,review_language,publishTime
0,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,paula-lou Salkeld (Pezzy),4,3 weeks ago,As massive war of the worlds fans we weren't s...,en,2025-08-04T12:00:55.142807609Z
1,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Sarah Harleyquinn,5,4 months ago,Having always liked War Of the Worlds my partn...,en,2025-04-21T08:53:41.700071Z
2,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Hannah Wild,5,4 months ago,"Wow, what a fun experience! I didn't know any...",en,2025-04-21T07:27:28.218635Z
3,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Tim Oldland,5,a week ago,Absolutely fantastic experience! I’ve been a f...,en,2025-08-16T07:57:11.543664413Z
4,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Velda Mazzara,5,5 months ago,"The experience was really good, could not faul...",en,2025-03-19T09:53:41.343420Z


In [7]:
df_batch.head(20)

Unnamed: 0,query,place_id,place_name,formatted_address,rating,user_ratings_total,review_author_name,review_rating,review_relative_time,review_text,review_language,publishTime
0,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,paula-lou Salkeld (Pezzy),4,3 weeks ago,As massive war of the worlds fans we weren't s...,en,2025-08-04T12:00:55.142807609Z
1,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Sarah Harleyquinn,5,4 months ago,Having always liked War Of the Worlds my partn...,en,2025-04-21T08:53:41.700071Z
2,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Hannah Wild,5,4 months ago,"Wow, what a fun experience! I didn't know any...",en,2025-04-21T07:27:28.218635Z
3,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Tim Oldland,5,a week ago,Absolutely fantastic experience! I’ve been a f...,en,2025-08-16T07:57:11.543664413Z
4,immersive experience in London,ChIJW4IlshQDdkgR7CWekBzGqJI,The War of The Worlds: The Immersive Experience,"56 Leadenhall St, London EC3A 2BJ, UK",4.8,3088,Velda Mazzara,5,5 months ago,"The experience was really good, could not faul...",en,2025-03-19T09:53:41.343420Z
5,immersive experience in London,ChIJW0sjYeEFdkgRdwCty-McgJs,"Immersive Gamebox - Southbank, London","Arch 3, 83 Scoresby St, London SE1 0XN, UK",4.9,4532,Jez Neeks,5,a week ago,"This is good fun for couples, families, friend...",en,2025-08-17T18:07:52.134500844Z
6,immersive experience in London,ChIJW0sjYeEFdkgRdwCty-McgJs,"Immersive Gamebox - Southbank, London","Arch 3, 83 Scoresby St, London SE1 0XN, UK",4.9,4532,Helena Hughes,5,a week ago,Such a fun experience! Sam was so kind and hel...,en,2025-08-19T18:14:56.705732603Z
7,immersive experience in London,ChIJW0sjYeEFdkgRdwCty-McgJs,"Immersive Gamebox - Southbank, London","Arch 3, 83 Scoresby St, London SE1 0XN, UK",4.9,4532,Holly Berndsen,5,2 months ago,SO MUCH FUN! I booked this for my boyfriend’s ...,en,2025-06-27T02:42:24.617526914Z
8,immersive experience in London,ChIJW0sjYeEFdkgRdwCty-McgJs,"Immersive Gamebox - Southbank, London","Arch 3, 83 Scoresby St, London SE1 0XN, UK",4.9,4532,Jade Bonifacio,5,a month ago,"Pricey, but the kids really, really enjoyed th...",en,2025-07-23T17:17:23.086481040Z
9,immersive experience in London,ChIJW0sjYeEFdkgRdwCty-McgJs,"Immersive Gamebox - Southbank, London","Arch 3, 83 Scoresby St, London SE1 0XN, UK",4.9,4532,Harmz B,5,6 months ago,We visited Immersive Gamebox in Southbank for ...,en,2025-02-03T02:48:22.659003Z


In [9]:
df_batch.to_csv("/Users/benjaminluff/code/benluff303/swipe-sense/London_immersive")

In [15]:
len(df_batch)

3720

In [16]:
df_batch2, state = run_all_queries_new(prev_state=state, max_queries_per_run=100)

print_progress_summary(state, target=TARGET_REVIEWS_PER_QUERY)

[1/100] adventure sport in Barcelona
[adventure sport in Barcelona] places=20, nextPageToken=True
[2/100] shop in Venice
[shop in Venice] places=20, nextPageToken=True
[3/100] market in Athens
[market in Athens] places=20, nextPageToken=True
[4/100] extreme sport in Interlaken
[extreme sport in Interlaken] places=11, nextPageToken=False
[5/100] hiking in Interlaken
[hiking in Interlaken] places=20, nextPageToken=True
[6/100] boat cruise in Interlaken
[boat cruise in Interlaken] places=3, nextPageToken=False
[7/100] water sport in Interlaken
[water sport in Interlaken] places=20, nextPageToken=True
[8/100] workshop in Interlaken
[workshop in Interlaken] places=20, nextPageToken=True
[9/100] hiking in Dolomites
[hiking in Dolomites] places=9, nextPageToken=False
[10/100] lake in Dolomites
[lake in Dolomites] places=4, nextPageToken=False
[11/100] meadow in Dolomites
[meadow in Dolomites] places=1, nextPageToken=False
[12/100] museum in Dolomites
[museum in Dolomites] places=11, nextPageT

NameError: name 'print_progress_summary' is not defined

In [18]:
len(df_batch2)

6291

In [19]:
df_batch3, state = run_all_queries_new(prev_state=state, max_queries_per_run=100)

[1/100] hiking in Interlaken
[hiking in Interlaken] places=17, nextPageToken=False
[2/100] workshop in Interlaken
[workshop in Interlaken] places=3, nextPageToken=False
[3/100] hiking in Lofoten Islands
[hiking in Lofoten Islands] places=20, nextPageToken=True
[4/100] wildlife in Faroe Islands
[wildlife in Faroe Islands] places=16, nextPageToken=False
[5/100] hiking in Faroe Islands
[hiking in Faroe Islands] places=20, nextPageToken=True
[6/100] wildlife in Azores
[wildlife in Azores] places=20, nextPageToken=True
[7/100] hiking in Scottish Highlands
[hiking in Scottish Highlands] places=20, nextPageToken=True
[8/100] market in Tokyo
[market in Tokyo] places=20, nextPageToken=True
[9/100] viewpoint in Hong Kong
[viewpoint in Hong Kong] places=20, nextPageToken=True
[10/100] viewpoint in Singapore
[viewpoint in Singapore] places=20, nextPageToken=True
[11/100] historic site in Hanoi
[historic site in Hanoi] places=20, nextPageToken=True
[12/100] cooking class in Siem Reap
[cooking class

In [20]:
len(df_batch3)

5234

In [21]:
df_batch4, state = run_all_queries_new(prev_state=state, max_queries_per_run=100)

[1/100] boat cruise in Raja Ampat
[boat cruise in Raja Ampat] places=9, nextPageToken=False
[2/100] hiking in Raja Ampat
[hiking in Raja Ampat] places=20, nextPageToken=True
[3/100] village visit in Raja Ampat
[village visit in Raja Ampat] places=20, nextPageToken=True
[4/100] boat cruise in Palawan
[boat cruise in Palawan] places=20, nextPageToken=True
[5/100] night activity in Palawan
[night activity in Palawan] places=20, nextPageToken=True
[6/100] boat cruise in Komodo Island
[boat cruise in Komodo Island] places=3, nextPageToken=False
[7/100] camping in Ladakh
[camping in Ladakh] places=20, nextPageToken=True
[8/100] temple in Bagan
[temple in Bagan] places=20, nextPageToken=True
[9/100] island hopping in Boracay
[island hopping in Boracay] places=2, nextPageToken=False
[10/100] adventure sport in Boracay
[adventure sport in Boracay] places=5, nextPageToken=False
[11/100] neighborhood in Miami
[neighborhood in Miami] places=20, nextPageToken=True
[12/100] market in Miami
[market i

In [22]:
len(df_batch4)

5692

In [24]:
df_batch4.head()

Unnamed: 0,query,place_id,place_name,formatted_address,rating,user_ratings_total,review_author_name,review_rating,review_relative_time,review_text,review_language,publishTime
0,boat cruise in Raja Ampat,ChIJaeiH1qb8Qi0RV_i5xC7DwM4,Misool Resort,"Yellu, South Misool, Raja Ampat Regency, West ...",4.8,111,N T,5,a month ago,"This is a special place, and we decided to go ...",en,2025-07-20T07:53:28.076436006Z
1,boat cruise in Raja Ampat,ChIJaeiH1qb8Qi0RV_i5xC7DwM4,Misool Resort,"Yellu, South Misool, Raja Ampat Regency, West ...",4.8,111,Sandra Rodner,5,10 months ago,We visited Misool September 2024 and had an in...,en,2024-10-06T19:20:53.449974Z
2,boat cruise in Raja Ampat,ChIJaeiH1qb8Qi0RV_i5xC7DwM4,Misool Resort,"Yellu, South Misool, Raja Ampat Regency, West ...",4.8,111,Gintare,5,a year ago,Just came back from spending a week in Misool ...,en,2024-04-13T06:36:11.936176Z
3,boat cruise in Raja Ampat,ChIJaeiH1qb8Qi0RV_i5xC7DwM4,Misool Resort,"Yellu, South Misool, Raja Ampat Regency, West ...",4.8,111,mickisis1 (Bonnie S),4,a year ago,I would have rated the rooms five stars if the...,en,2023-11-29T12:27:35.173891Z
4,boat cruise in Raja Ampat,ChIJaeiH1qb8Qi0RV_i5xC7DwM4,Misool Resort,"Yellu, South Misool, Raja Ampat Regency, West ...",4.8,111,Ken & Steve Timmons-Kinholt,5,2 years ago,I can't say enough good things about this plac...,en,2023-03-12T14:14:50.823456Z


In [26]:
df_batch5, state = run_all_queries_new(prev_state=state, max_queries_per_run=100)

[1/100] bridge/walk in Vancouver
[bridge/walk in Vancouver] places=6, nextPageToken=False
[2/100] kayaking in Vancouver
[kayaking in Vancouver] places=20, nextPageToken=True
[3/100] wildlife safari in Alaska/Denali
[wildlife safari in Alaska/Denali] places=2, nextPageToken=False
[4/100] historic site in Honolulu/Oahu
[historic site in Honolulu/Oahu] places=20, nextPageToken=True
[5/100] adventure sport in Honolulu/Oahu
[adventure sport in Honolulu/Oahu] places=20, nextPageToken=True
[6/100] spa/wellness in Quebec City
[spa/wellness in Quebec City] places=20, nextPageToken=True
[7/100] adventure sport in Rio de Janeiro
[adventure sport in Rio de Janeiro] places=20, nextPageToken=True
[8/100] cultural activity in Havana
[cultural activity in Havana] places=1, nextPageToken=False
[9/100] museum in Havana
[museum in Havana] places=20, nextPageToken=True
[10/100] water sport in Cancún
[water sport in Cancún] places=11, nextPageToken=False
[11/100] historic site in Cancún
[historic site in C

In [28]:
len(df_batch5)

5254

In [29]:
df_batch5.head()

Unnamed: 0,query,activity,location,place_id,place_name,formatted_address,rating,user_ratings_total,review_author_name,review_rating,review_relative_time,review_text,review_language,publishTime
0,bridge/walk in Vancouver,bridge/walk,Vancouver,ChIJXeuJMwxuhlQRc-9kxg3cx0w,West Vancouver Centennial Seawalk,"2240 Bellevue Ave, West Vancouver, BC V7V 1C6,...",4.7,247,Harry Li,5,a year ago,I simply cannot find any word to describe this...,en,2024-06-09T02:56:58.261667Z
1,bridge/walk in Vancouver,bridge/walk,Vancouver,ChIJXeuJMwxuhlQRc-9kxg3cx0w,West Vancouver Centennial Seawalk,"2240 Bellevue Ave, West Vancouver, BC V7V 1C6,...",4.7,247,Eddy Sarkisov,5,11 months ago,This is such a beautiful and spacious pathway ...,en,2024-09-27T05:57:59.363850Z
2,bridge/walk in Vancouver,bridge/walk,Vancouver,ChIJXeuJMwxuhlQRc-9kxg3cx0w,West Vancouver Centennial Seawalk,"2240 Bellevue Ave, West Vancouver, BC V7V 1C6,...",4.7,247,Christian Johannsen,5,a year ago,Nice place to take a stroll along the ocean. P...,en,2024-05-01T14:58:31.586020Z
3,bridge/walk in Vancouver,bridge/walk,Vancouver,ChIJXeuJMwxuhlQRc-9kxg3cx0w,West Vancouver Centennial Seawalk,"2240 Bellevue Ave, West Vancouver, BC V7V 1C6,...",4.7,247,James Concepcion,5,7 years ago,My favorite place to walk in the world. It is ...,en,2018-06-06T01:31:49.509Z
4,bridge/walk in Vancouver,bridge/walk,Vancouver,ChIJXeuJMwxuhlQRc-9kxg3cx0w,West Vancouver Centennial Seawalk,"2240 Bellevue Ave, West Vancouver, BC V7V 1C6,...",4.7,247,Cara Black,5,2 months ago,Gorgeous view and places to sit! Highly recomm...,en,2025-06-19T17:44:57.125456083Z


In [30]:
df_batch5.review_text.iloc[0]

'I simply cannot find any word to describe this place. When you’re walking along the beach, you just couldn’t be upset, no matter what happened, you just feel so comfortable. Any random picture you take could be on the your home screen. The water temperature is OK to swim during the summer, there’s a lot of parking space at one end of the walk. You get to see the bridges Stanley Park, and always a lot of people boating, and big ships. Dogs are not permitted and so are bikes, but your dog could follow you just outside the fence if you trust them to do so. Nothing too special just a feeling of home.'

In [31]:
df_batch6, state = run_all_queries_new(prev_state=state, max_queries_per_run=100)

[1/100] historic site in Tulum
[historic site in Tulum] places=20, nextPageToken=True
[2/100] theme park in Punta Cana
[theme park in Punta Cana] places=20, nextPageToken=True
[3/100] hiking in Galápagos Islands
[hiking in Galápagos Islands] places=20, nextPageToken=True
[4/100] wildlife in Galápagos Islands
[wildlife in Galápagos Islands] places=5, nextPageToken=False
[5/100] horseback riding in Torres del Paine
[horseback riding in Torres del Paine] places=2, nextPageToken=False
[6/100] homestay in Lake Titicaca
[homestay in Lake Titicaca] places=20, nextPageToken=True
[7/100] historic site in Lake Titicaca
[historic site in Lake Titicaca] places=20, nextPageToken=True
[8/100] hiking in Lake Titicaca
[hiking in Lake Titicaca] places=20, nextPageToken=True
[9/100] boat ride in Bariloche
[boat ride in Bariloche] places=14, nextPageToken=False
[10/100] cultural site in Fernando de Noronha
[cultural site in Fernando de Noronha] places=20, nextPageToken=True
[11/100] market in San Juan
[m

In [32]:
df_batch6, state = run_all_queries_new(prev_state=state, max_queries_per_run=50)

[1/21] boat cruise in Okavango Delta
[boat cruise in Okavango Delta] places=19, nextPageToken=False
[2/21] desert tour in Namib Desert
[desert tour in Namib Desert] places=20, nextPageToken=True
[3/21] snorkeling in Seychelles
[snorkeling in Seychelles] places=20, nextPageToken=True
[4/21] water sport in Mauritius
[water sport in Mauritius] places=20, nextPageToken=True
[5/21] village visit in Mauritius


KeyboardInterrupt: 

In [33]:
len(df_batch6)

5676

In [34]:
df_batch6.head()

Unnamed: 0,query,activity,location,place_id,place_name,formatted_address,rating,user_ratings_total,review_author_name,review_rating,review_relative_time,review_text,review_language,publishTime
0,historic site in Tulum,historic site,Tulum,ChIJgTQhxTMoUI8R6NkHj0toHsk,Laguna de Kaan Luum,"77760 Tulum, Quintana Roo, Mexico",4.6,5419,Luiz Balbo,5,2 weeks ago,Absolutely stunning spot! Definitely a must-vi...,en,2025-08-12T12:58:59.650866975Z
1,historic site in Tulum,historic site,Tulum,ChIJgTQhxTMoUI8R6NkHj0toHsk,Laguna de Kaan Luum,"77760 Tulum, Quintana Roo, Mexico",4.6,5419,David Castañeda,5,a month ago,Laguna Kaan Luum is a stunning and peaceful la...,en,2025-07-16T20:15:39.022095397Z
2,historic site in Tulum,historic site,Tulum,ChIJgTQhxTMoUI8R6NkHj0toHsk,Laguna de Kaan Luum,"77760 Tulum, Quintana Roo, Mexico",4.6,5419,Dalila Rodriguez,5,a month ago,The lagoon is very kid-friendly—not deep at al...,en,2025-07-08T05:18:58.807980716Z
3,historic site in Tulum,historic site,Tulum,ChIJgTQhxTMoUI8R6NkHj0toHsk,Laguna de Kaan Luum,"77760 Tulum, Quintana Roo, Mexico",4.6,5419,Susana Simon,5,2 months ago,Great place to spend the day. You can bring yo...,en,2025-06-16T20:22:54.228641423Z
4,historic site in Tulum,historic site,Tulum,ChIJgTQhxTMoUI8R6NkHj0toHsk,Laguna de Kaan Luum,"77760 Tulum, Quintana Roo, Mexico",4.6,5419,valeriia u.,4,3 months ago,We arrived around 10 a.m. There were two of us...,en,2025-05-21T01:17:14.706070Z


In [35]:
df_batch7, state = run_all_queries_new(prev_state=state, max_queries_per_run=50)

[1/17] village visit in Mauritius
[village visit in Mauritius] places=20, nextPageToken=True
[2/17] historic site in Fes
[historic site in Fes] places=20, nextPageToken=True
[3/17] museum in Fes
[museum in Fes] places=20, nextPageToken=True
[4/17] landmark walk in Jerusalem
[landmark walk in Jerusalem] places=15, nextPageToken=False
[5/17] hiking in Petra
[hiking in Petra] places=20, nextPageToken=True
[6/17] landmark in Dubai
[landmark in Dubai] places=20, nextPageToken=True
[7/17] fishing in Musandam Fjords
[fishing in Musandam Fjords] places=5, nextPageToken=False
[8/17] snorkeling in Muscat
[snorkeling in Muscat] places=20, nextPageToken=True
[9/17] architecture in Tel Aviv
[architecture in Tel Aviv] places=19, nextPageToken=True
[10/17] old town tour in Tel Aviv
[old town tour in Tel Aviv] places=17, nextPageToken=True
[11/17] historic site in Sydney
[historic site in Sydney] places=20, nextPageToken=True
[12/17] biking in Queenstown
[biking in Queenstown] places=16, nextPageToken

In [36]:
df_batch6.tail()

Unnamed: 0,query,activity,location,place_id,place_name,formatted_address,rating,user_ratings_total,review_author_name,review_rating,review_relative_time,review_text,review_language,publishTime
5671,snorkeling in Cairns,snorkeling,Cairns,ChIJZSkWhAJkeGkRfFLN5DqVsIw,Spirit of Freedom - Dive Liveaboard,"319 Draper St, Parramatta Park QLD 4870, Austr...",4.8,104,Coco Tsai,5,5 months ago,Fantastic crew with amazing dives. First time ...,en,2025-03-17T11:51:16.666192Z
5672,snorkeling in Cairns,snorkeling,Cairns,ChIJZSkWhAJkeGkRfFLN5DqVsIw,Spirit of Freedom - Dive Liveaboard,"319 Draper St, Parramatta Park QLD 4870, Austr...",4.8,104,Rob McJannett,5,6 months ago,Incredible Crew & Unforgettable Diving! My 3-...,en,2025-02-28T00:41:58.962203Z
5673,snorkeling in Cairns,snorkeling,Cairns,ChIJZSkWhAJkeGkRfFLN5DqVsIw,Spirit of Freedom - Dive Liveaboard,"319 Draper St, Parramatta Park QLD 4870, Austr...",4.8,104,F S,5,6 months ago,The crew on the boat deserve 10 stars. I could...,en,2025-02-16T01:26:09.541078Z
5674,snorkeling in Cairns,snorkeling,Cairns,ChIJZSkWhAJkeGkRfFLN5DqVsIw,Spirit of Freedom - Dive Liveaboard,"319 Draper St, Parramatta Park QLD 4870, Austr...",4.8,104,Sean Flaherty,5,2 months ago,Unforgettable Dive Adventure – Spirit of Freed...,en,2025-06-01T12:48:31.585816Z
5675,snorkeling in Cairns,snorkeling,Cairns,ChIJZSkWhAJkeGkRfFLN5DqVsIw,Spirit of Freedom - Dive Liveaboard,"319 Draper St, Parramatta Park QLD 4870, Austr...",4.8,104,Piotr Ślatała,5,7 months ago,"Amazing experience. On the expensive side, but...",en,2025-01-06T00:22:22.631196Z
