# 1. Install Chrome Webdriver and dependencies in Colab

In [None]:
!apt-get update
!apt-get install -y chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin

!pip install selenium pandas --quiet

# 2. Load dependencies

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime
import pandas as pd
import time
import re

# 3. Initialize function for getting the place name

In [None]:
def get_place_name_from_url(url):
    match = re.search(r'/place/([^/]+)', url)
    if match:
        return match.group(1).replace('+', '_')
    else:
        return 'unknown_place'

# 4. Initialize function for scraping reviews

In [None]:
def scrape_google_maps_reviews(place_url, max_wait=2):
    # Set up Chrome options for headless operation in Colab
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.binary_location = "/usr/bin/chromium-browser"

    driver = webdriver.Chrome(options=chrome_options)
    driver.get(place_url)
    time.sleep(5)  # Let the page load

    # Pass Google consent page
    if "consent.google.com" in driver.current_url or "/consent" in driver.current_url:
      print("Trying to pass Google consent page")
      try:
        WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(@class, 'UywwFc-LgbsSe')]/span[text()='Terima semua']"))).click()
      except Exception:
        print(driver.page_source)

    # Wait for page to be ready
    while driver.execute_script("return document.readyState") != "complete":
      time.sleep(1)

    # Scroll to the reviews section (simulate click on 'All reviews' button if present)
    try:
        all_reviews_button = driver.find_element(By.XPATH, '//button[contains(@aria-label, "Ulasan")]')
        all_reviews_button.click()
        time.sleep(3)
    except Exception:
        pass  # Button not found, maybe already in reviews

    # Wait for reviews container to load
    time.sleep(3)

    # Find the scrollable reviews container
    try:
        scrollable_div = driver.find_element(By.XPATH, '//div[@role="region" and @tabindex="0"]')
    except Exception:
        scrollable_div = driver.find_element(By.XPATH, '//div[contains(@class, "m6QErb DxyBCb kA9KIf dS8AEf XiKgde ")]')

    # Auto-scroll loop
    last_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_div)

    # Expand all truncated reviews by scrolling and clicking "Lihat lainnya"
    last_review_count = 0
    no_new_reviews_count = 0
    max_no_new_reviews = 5 # Adjust this based on how many times you want to scroll without new reviews before stopping

    while True:
        # Scroll down to load more reviews
        driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scrollable_div)
        time.sleep(max_wait) # Wait for content to load

        # Click all "Lihat lainnya" buttons
        more_buttons = driver.find_elements(By.XPATH, '//button[contains(@aria-label, "Lihat lainnya")]')
        for btn in more_buttons:
            try:
                driver.execute_script("arguments[0].click();", btn)
                time.sleep(0.1) # Small delay after clicking
            except Exception as e:
                print(f"Could not click a 'Lihat lainnya' button: {e}")
                continue

        # Check if new reviews have loaded
        current_review_count = len(driver.find_elements(By.XPATH, '//div[contains(@class, "jJc9Ad ")]'))

        if current_review_count == last_review_count:
            no_new_reviews_count += 1
            if no_new_reviews_count >= max_no_new_reviews:
                print(f"\nNo new reviews loaded after {max_no_new_reviews} scrolls. Exiting review expansion loop.")
                break # Exit if no new reviews loaded after several scrolls
        else:
            last_review_count = current_review_count
            no_new_reviews_count = 0 # Reset the counter if new reviews are found

        print(f"\rCurrent review count: {current_review_count}", end='', flush=True)

    # Extract reviews and ratings
    reviews = []
    ratings = []
    review_blocks = driver.find_elements(By.XPATH, '//div[contains(@class, "jJc9Ad ")]')
    for block in review_blocks:
        try:
            rating = block.find_element(By.XPATH, './/span[@role="img"]').get_attribute("aria-label")
            rating_val = re.search(r'(\d+)\s+bintang', rating)

            if rating_val:
                rating_num = int(rating_val.group(1))
            else:
                rating_num = None
        except Exception:
            rating_num = None
        try:
            review_text = block.find_element(By.XPATH, './/span[contains(@class, "wiI7pd")]').text
        except Exception:
            review_text = ""

        if review_text.strip():  # Only keep reviews with text
            reviews.append(review_text.strip())
            ratings.append(rating_num)

    driver.quit()
    print(f"Found {len(reviews)} reviews")
    return pd.DataFrame({'rating': ratings, 'review': reviews})

# 5. Run scraping

In [None]:
# List your Google Maps URLs here
place_urls = [
    "https://www.google.com/maps/place/Curug+Cipanas/@-6.8018806,107.5891477,18z/data=!4m8!3m7!1s0x2e68e199627b9451:0xb0da93f3fa8cce42!8m2!3d-6.8018806!4d107.5915295!9m1!1b1!16s%2Fg%2F11fy4jg1lf?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Maribaya/@-6.8301382,107.6523699,17z/data=!3m1!4b1!4m18!1m9!3m8!1s0x2e68e0a52b861003:0xa130fdfd96505b36!2sCurug+Maribaya!8m2!3d-6.8301382!4d107.6549448!9m1!1b1!16s%2Fg%2F1yg4dcvvc!3m7!1s0x2e68e0a52b861003:0xa130fdfd96505b36!8m2!3d-6.8301382!4d107.6549448!9m1!1b1!16s%2Fg%2F1yg4dcvvc?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Cinulang/@-6.9628607,107.8789675,17z/data=!4m8!3m7!1s0x2e68cff4f238b30d:0xc78ed4f54010c569!8m2!3d-6.9628607!4d107.8815424!9m1!1b1!16s%2Fg%2F11b_2n9x7x?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Tilu+Leuwi+Opat/@-6.7907749,107.5794346,17z/data=!4m8!3m7!1s0x2e68e18e83c36405:0x72275e219144f8f3!8m2!3d-6.7907749!4d107.5820095!9m1!1b1!16s%2Fg%2F11bw4wr_1j?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Layung+%26+Camping+Ground/@-6.7768125,107.5752376,17z/data=!4m8!3m7!1s0x2e68e142f2d3bf0d:0x5765d9ceb0f35ae7!8m2!3d-6.7768125!4d107.5778125!9m1!1b1!16s%2Fg%2F11hs54sggh?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Wisata+Air+Curug+Tilu+Rancabali/@-7.1497411,107.3733276,17z/data=!4m8!3m7!1s0x2e688b84894f55b7:0x1991b48c122c9e87!8m2!3d-7.1497411!4d107.3759025!9m1!1b1!16s%2Fg%2F11c2p7qrnk?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Dago/@-6.8655225,107.6156129,17z/data=!4m8!3m7!1s0x2e68e6de2e06f0f1:0xe61f70cd002fbbb3!8m2!3d-6.8655225!4d107.6181878!9m1!1b1!16s%2Fg%2F122jdw0f?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Malela/@-7.0182384,107.1969147,15z/data=!4m8!3m7!1s0x2e6859502d1faae5:0x637a7f90a74e56e2!8m2!3d-7.0182386!4d107.2072145!9m1!1b1!16s%2Fg%2F1pzvnk9v5?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Pelangi/@-6.7987011,107.5734603,17z/data=!4m8!3m7!1s0x2e68e1f7e3b09c73:0xf289999555b21387!8m2!3d-6.7987011!4d107.5760352!9m1!1b1!16s%2Fg%2F11shns5ttw?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D",
    "https://www.google.com/maps/place/Curug+Cilengkrang/@-6.8906021,107.7282513,17z/data=!4m8!3m7!1s0x2e68dcf100ccd99f:0x8566c419ce523b48!8m2!3d-6.8906021!4d107.7308262!9m1!1b1!16s%2Fg%2F11bx2hlg3n?hl=id&entry=ttu&g_ep=EgoyMDI1MDYxMS4wIKXMDSoASAFQAw%3D%3D"
]

for url in place_urls:
    print(f"Processing: {url}")
    place_name = get_place_name_from_url(url)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    df = scrape_google_maps_reviews(url)
    filename = f"{place_name}_reviews_{timestamp}.csv"
    df.to_csv(filename, index=False)
    print(f"Saved: {filename}")

print("DONE. Check the files in your Colab workspace.")