# Google Maps Collector — **Indonesia Grid** — SerpAPI

Flow Kerja:
- Grid Indonesia (ll="@lat,lng,zoom")
- `GLOBAL_CAP=250` → batas total hasil unik per query
- Satu query: **"Tempat Wisata Indonesia"**
- Di akhir pipeline: dedup `place_id` lalu `.head(250)` sebagai pagar akhir

## 0) Setup & Install

In [1]:
!pip -q install serpapi pandas python-dotenv tqdm SQLAlchemy

## 1) Import, ENV, Inisialisasi

In [2]:
import os, time, math, re
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
from sqlalchemy import create_engine
import serpapi

load_dotenv()
API_KEY = os.getenv("API_KEY", "6ad6f6217e2c6686e97a58d05be7f93ba7e364d9fc9d3f04152ccff767967333").strip()
if not API_KEY:
    print("Peringatan: API_KEY (SerpAPI) belum ada di .env")
else:
    print("API_KEY terdeteksi ✔")
client = serpapi.Client(api_key=API_KEY)

API_KEY terdeteksi ✔


## 2) Grid Indonesia & Utilitas

In [3]:
ID_BBOX = {"lat_min": -11.0, "lat_max": 6.5, "lng_min": 95.0, "lng_max": 141.0}

def deg_step_from_km(km: float, at_lat_deg: float = 0.0) -> tuple[float,float]:
    lat_step = km / 111.0
    lng_step = km / (111.0 * max(math.cos(math.radians(at_lat_deg)), 1e-6))
    return lat_step, lng_step

def generate_id_grid(step_km: float = 400.0) -> list[tuple[float,float]]:
    centers = []
    lat = ID_BBOX["lat_min"]
    while lat <= ID_BBOX["lat_max"] + 1e-9:
        lat_step, _ = deg_step_from_km(step_km, at_lat_deg=lat)
        _, lng_step = deg_step_from_km(step_km, at_lat_deg=lat)
        lng = ID_BBOX["lng_min"]
        while lng <= ID_BBOX["lng_max"] + 1e-9:
            centers.append((round(lat,6), round(lng,6)))
            lng += lng_step
        lat += lat_step
    return centers

def ll_from_latlng_zoom(lat: float, lng: float, zoom: int = 8) -> str:
    return f"@{lat},{lng},{zoom}z"

## 3) Extract — Scraper SerpAPI dengan dukungan Grid Indonesia

In [4]:
def scrape_web_grid(query: str,
                    grid_centers: list[tuple[float,float]],
                    zoom: int = 8,
                    per_ll_cap: int = 20,
                    global_cap: int = 250,
                    lang: str = "id",
                    country: str = "id",
                    sleep_req: float = 5.0,
                    sleep_between_ll: float = 2.0,
                    get_details: bool = True,
                    sleep_details: float = 0.8) -> pd.DataFrame:
    def _ambil_deskripsi_dari_detail(detail: dict) -> str | None:
        if not isinstance(detail, dict):
            return None

        ed = detail.get("editorial_summary")
        if isinstance(ed, dict):
            for k in ("overview", "description", "editorial_summary"):
                v = ed.get(k)
                if isinstance(v, str) and v.strip():
                    return v.strip()

        v = detail.get("description")
        if isinstance(v, str) and v.strip():
            return v.strip()

        about = detail.get("about")
        vals = []
        if isinstance(about, dict):
            for _, vv in about.items():
                if isinstance(vv, str) and vv.strip():
                    vals.append(vv.strip())
                elif isinstance(vv, list):
                    for it in vv:
                        if isinstance(it, str) and it.strip():
                            vals.append(it.strip())
        elif isinstance(about, list):
            for it in about:
                if isinstance(it, dict):
                    vv = it.get("value") or it.get("title")
                    if isinstance(vv, str) and vv.strip():
                        vals.append(vv.strip())
                elif isinstance(it, str) and it.strip():
                    vals.append(it.strip())
        if vals:
            return "; ".join(vals)

        return None

    def _fetch_place_details_serpapi(place_id: str, data_id: str | None, lang="id", country="id") -> dict:
        params = {"engine": "google_maps_place", "hl": lang, "gl": country}
        if data_id:
            params["data_id"] = data_id
        else:
            params["place_id"] = place_id
        try:
            return client.search(params) or {}
        except Exception:
            return {}

    data = []
    seen = set()
    total_target = global_cap
    pbar = tqdm(total=total_target, desc=f"{query} (ID-grid)")

    for (lat, lng) in grid_centers:
        if len(data) >= total_target:
            break
        ll = ll_from_latlng_zoom(lat, lng, zoom=zoom)
        start = 0
        got_here = 0

        while got_here < per_ll_cap and len(data) < total_target:
            params = {
                "engine": "google_maps",
                "type": "search",
                "q": query,
                "hl": lang,
                "gl": country,
                "start": start,
                "ll": ll,
            }
            results = client.search(params)
            places = results.get("local_results", []) or []
            if not places:
                break

            for place in places:
                pid = place.get("place_id")
                if not pid or pid in seen:
                    continue

                gps = place.get("gps_coordinates") or {}
                lat_place = gps.get("latitude")
                lng_place = gps.get("longitude")

                deskripsi = None
                data_id = place.get("data_id")
                if get_details:
                    detail = _fetch_place_details_serpapi(pid, data_id, lang=lang, country=country)
                    deskripsi = _ambil_deskripsi_dari_detail(detail)
                    time.sleep(sleep_details)

                row = {
                    "nama_tempat": place.get("title"),
                    "rating": place.get("rating"),
                    "jumlah_ulasan": place.get("reviews"),
                    "alamat": place.get("address"),
                    "kategori": place.get("type"),
                    "lat": lat_place,
                    "lng": lng_place,
                    "thumbnail": place.get("thumbnail"),
                    "link": f"https://www.google.com/maps/place/?q=place_id:{pid}",
                    "place_id": pid,
                    "data_id": data_id,
                    "deskripsi": deskripsi,
                    "grid_lat": lat,
                    "grid_lng": lng,
                    "grid_zoom": zoom
                }
                data.append(row)
                seen.add(pid)
                got_here += 1
                pbar.update(1)
                if got_here >= per_ll_cap or len(data) >= total_target:
                    break

            start += len(places)
            time.sleep(sleep_req)

        time.sleep(sleep_between_ll)

    pbar.close()
    df = pd.DataFrame(data).drop_duplicates(subset=["place_id"]).reset_index(drop=True)
    return df

## 4) Helper: Simpan CSV

In [5]:
def save_category_csv(df: pd.DataFrame, query: str, suffix="ID") -> str:
    import re
    kategori = query.replace("Indonesia","").strip()
    safe_kategori = re.sub(r"[^\w\s-]+","", kategori).strip().replace(" ","_")
    out_name = f"data_{safe_kategori or 'noname'}_{suffix}.csv"
    df.to_csv(out_name, index=False, encoding="utf-8")
    print(f"CSV → {out_name} | rows={len(df)}")
    return out_name

## 5) Transform & Load (opsional)

In [6]:
def transform_data(dataframe, path='gambar_data/', nama_file='transformed.csv', description_csv='data/description.csv'):
    import os
    df = dataframe.copy()
    if "nama_tempat" not in df.columns:
        raise ValueError("Kolom 'nama_tempat' tidak ada.")
    df["gambar"] = None

    name_to_idx = {}
    for idx, row in df.iterrows():
        name = str(row["nama_tempat"]).strip()
        name_to_idx.setdefault(name, []).append(idx)

    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            folder_name = root.replace(path, "").strip(os.sep)
            if not folder_name:
                continue
            if folder_name in name_to_idx:
                file_paths = [os.path.join(root, f) for f in files]
                for idx in name_to_idx[folder_name]:
                    df.at[idx, "gambar"] = file_paths.copy()

    df.dropna(subset=["gambar"], inplace=True)
    df.drop(columns=["Unnamed: 0", "kategori"], inplace=True, errors="ignore")

    if os.path.isfile(description_csv):
        description = pd.read_csv(description_csv)
        add_description = description.merge(df, on="nama_tempat", how="left")
        add_description.dropna(inplace=True)
        add_description.drop_duplicates(subset=["nama_tempat"], inplace=True)
        add_description.to_csv(nama_file, index=False)
        print(f"Transform → {nama_file} | rows={len(add_description)}")
        return add_description
    else:
        df.drop_duplicates(subset=["nama_tempat"], inplace=True)
        df.to_csv(nama_file, index=False)
        print(f"Transform (tanpa deskripsi) → {nama_file} | rows={len(df)}")
        return df

def to_db(dataframe, db_url=None, table_name="products", if_exists="append"):
    db_url = db_url or os.getenv("DB_URL","").strip()
    if not db_url:
        print("Lewati load: DB_URL tidak di-set")
        return None
    try:
        engine = create_engine(db_url)
        with engine.begin() as con:
            dataframe.to_sql(table_name, con=con, if_exists=if_exists, index=False)
        print(f"Loaded → {table_name} rows={len(dataframe)}")
    except Exception as e:
        print("DB load error:", e)
        return None

## 6) Pipeline Target 250 tempat

In [7]:
if __name__ == "__main__":
    STEP_KM = 400.0
    ZOOM = 8
    grid = generate_id_grid(STEP_KM)
    print("Jumlah titik grid:", len(grid))

    PER_LL_CAP = 20
    GLOBAL_CAP = 250 # total tempat wisata/sesuai limit free plan serapi 250 search

    queries = ["Tempat Wisata Indonesia"]

    csv_paths = []
    for q in queries:
        dfq = scrape_web_grid(q, grid, zoom=ZOOM, per_ll_cap=PER_LL_CAP, global_cap=GLOBAL_CAP, sleep_req=5.0, sleep_between_ll=2.0)
        dfq = dfq.drop_duplicates(subset=["place_id"]).head(GLOBAL_CAP)
        p = save_category_csv(dfq, q, suffix="ID")
        csv_paths.append(p)

    frames = [pd.read_csv(p) for p in csv_paths if os.path.isfile(p)]
    all_df = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
    all_df = all_df.drop_duplicates(subset=["place_id"]).head(GLOBAL_CAP)
    all_df.to_csv("all_data_indonesia_250.csv", index=False)
    print("all_data_indonesia_250.csv tersimpan | rows=", len(all_df))

    transformed = transform_data(all_df, path="gambar_data/", nama_file="transformed_indonesia_250.csv", description_csv="data/description.csv")
    to_db(transformed)

Jumlah titik grid: 65


Tempat Wisata Indonesia (ID-grid):   0%|          | 0/250 [00:00<?, ?it/s]

HTTPError: 429 Client Error: Too Many Requests for url: https://serpapi.com/search?engine=google_maps&type=search&q=Tempat+Wisata+Indonesia&hl=id&gl=id&start=0&ll=%40-11.0%2C95.0%2C8z&api_key=6ad6f6217e2c6686e97a58d05be7f93ba7e364d9fc9d3f04152ccff767967333