In [1]:
import os, csv, time, json, math
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import certifi
from dotenv import load_dotenv

In [3]:
import sys
print(sys.version)

3.12.3 | packaged by conda-forge | (main, Apr 15 2024, 18:20:11) [MSC v.1938 64 bit (AMD64)]


In [5]:
load_dotenv()
API_KEY = os.environ["SERPAPI_KEY"]
SERPAPI_KEY = os.environ["SERPAPI_KEY"]
CENTER_LAT, CENTER_LNG = 41.0430, 29.0086     # Beşiktaş merkez
QUERIES = ["restoran"]                         # istersen: ["restoran","cafe"]
TOP_N = 15                                     # 10–15
MAX_REVIEW_PAGES_PER_PLACE = 15                # her sayfa ~10-20 yorum; kota/maliyet artar
REVIEWS_SORT = "qualityScore"                  # "qualityScore" | "newestFirst" | "ratingHigh" | "ratingLow"
CSV_PATH = "besiktas_reviews_serpapi_part_full.csv"
SLEEP_BETWEEN_PAGES = 1.3              
BASE = "https://serpapi.com/search"

In [7]:
def make_session():
    s = requests.Session()
    retry = Retry(
        total=6, connect=6, read=6,
        backoff_factor=0.7,
        status_forcelist=[429, 500, 502, 503, 504],
        allowed_methods=["GET"],
        raise_on_status=False,
    )
    s.mount("https://", HTTPAdapter(max_retries=retry))
    s.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Connection": "close",
    })
    return s

def serpapi_get(params: dict) -> dict:
    assert SERPAPI_KEY and SERPAPI_KEY != "BURAYA_SERPAPI_KEYINI_YAZ", "SERPAPI_KEY boş veya placeholder!"
    p = dict(params); p["api_key"] = SERPAPI_KEY
    with make_session() as s:
        r = s.get(BASE, params=p, timeout=40, verify=certifi.where())
        if r.status_code >= 400:
            raise requests.HTTPError(f"{r.status_code} {r.reason}: {r.url}\n{r.text}")
        try:
            return r.json()
        except Exception:
            return json.loads(r.text)

def popularity_score(item: dict) -> float:
    try:
        r = float(item.get("rating") or 0)
        c = float(item.get("reviews") or 0)
        return r * c
    except Exception:
        return 0.0

def maps_search_once(query: str, lat: float, lng: float, limit: int = 80) -> list:
    params = {
        "engine": "google_maps",
        "type": "search",
        "q": query,
        "ll": f"@{lat},{lng},14z",
        "hl": "tr",
        "gl": "tr",
    }
    data = serpapi_get(params)
    return (data.get("local_results") or [])[:limit]

def maps_reviews_all(*, data_id: str = None, place_id: str = None,
                     max_pages: int = 10, sort_by: str = "qualityScore") -> list:
    ident = {}
    if data_id:
        ident["data_id"] = data_id
    elif place_id:
        ident["place_id"] = place_id
    else:
        raise ValueError("data_id veya place_id gerekli")

    out, token, page = [], None, 0
    while True:
        page += 1
        params = {
            "engine": "google_maps_reviews",
            "hl": "tr",
            "sort_by": sort_by,
            **ident
        }
        if token:
            params["next_page_token"] = token

        data = serpapi_get(params)
        rows = data.get("reviews", []) or []
        out.extend(rows)
        token = (data.get("serpapi_pagination") or {}).get("next_page_token")
        print(f"    ▹ Sayfa {page}: {len(rows)} yorum")
        if not token or page >= max_pages:
            break
        time.sleep(SLEEP_BETWEEN_PAGES)
    return out

def ensure_csv_header(path: str):
    if not os.path.exists(path) or os.path.getsize(path) == 0:
        with open(path, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=["review","name","address","longitude","latitude"])
            writer.writeheader()

def review_key(name: str, address: str, review: str) -> str:
    base = f"{(name or '').strip().lower()}|{(address or '').strip().lower()}|{(review or '').strip().lower()}"
    return hashlib.sha256(base.encode("utf-8", errors="ignore")).hexdigest()

def load_seen_keys(path: str) -> set:
    seen = set()
    if not os.path.exists(path):
        return seen
    try:
        import pandas as pd
        df = pd.read_csv(path)
        if {"name","address","review"}.issubset(df.columns):
            for _, row in df.iterrows():
                seen.add(review_key(row.get("name",""), row.get("address",""), row.get("review","")))
    except Exception:
        # büyük dosyada pandas yoksa satır satır da yapılabilir
        with open(path, "r", encoding="utf-8", errors="ignore") as f:
            next(f, None)  # header atla
            for line in f:
                # kaba yöntem: virgüllerden split etmek yanıltıcı olabilir ama yine de bir set başlangıcı sağlar
                pass
    return seen

def main():
    # 0) CSV başlığı (append stratejisi) ve mevcut kayıtların anahtarları
    ensure_csv_header(CSV_PATH)
    seen = load_seen_keys(CSV_PATH)
    print(f"[i] Mevcut CSV: {CSV_PATH} | bilinen kayıt: {len(seen)}")

    # 1) Yerleri topla
    all_items = []
    for q in QUERIES:
        items = maps_search_once(q, CENTER_LAT, CENTER_LNG, limit=80)
        all_items.extend(items)

    # 2) Tekilleştir + sırala
    idx = {}
    for it in all_items:
        key = it.get("data_id") or it.get("place_id") or it.get("link")
        if not key: continue
        if key not in idx or popularity_score(it) > popularity_score(idx[key]):
            idx[key] = it

    ranked = sorted(idx.values(), key=popularity_score, reverse=True)[:TOP_N]
    print(f"[i] Hedef mekan sayısı: {len(ranked)} (TOP_N={TOP_N})")

    # 3) Mekanları tek tek işle ve CSV'ye APPEND et
    written_rows_total = 0
    with open(CSV_PATH, "a", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["review","name","address","longitude","latitude"])

        for i, place in enumerate(ranked, 1):
            name = place.get("title") or ""
            address = place.get("address") or place.get("full_address") or ""
            coords = place.get("gps_coordinates") or {}
            lat = coords.get("latitude", "")
            lng = coords.get("longitude", "")
            data_id = place.get("data_id")
            place_id = place.get("place_id")
            ident = {"data_id": data_id} if data_id else {"place_id": place_id}
            print(f"\n[{i}/{len(ranked)}] {name} | {address}")

            try:
                reviews = maps_reviews_all(
                    max_pages=MAX_REVIEW_PAGES_PER_PLACE,
                    sort_by=REVIEWS_SORT,
                    **ident
                )
            except Exception as e:
                print("   ! Yorum çekme hatası:", e)
                continue

            wrote = 0
            for r in reviews:
                text = (r.get("description") or r.get("snippet") or "").replace("\n"," ").strip()
                if not text:
                    continue
                keyhash = review_key(name, address, text)
                if keyhash in seen:
                    continue
                writer.writerow({
                    "review": text,
                    "name": name,
                    "address": address,
                    "longitude": lng,
                    "latitude": lat
                })
                seen.add(keyhash)
                wrote += 1
                written_rows_total += 1

            print(f"    ✓ Yazılan yorum: {wrote} (toplam={written_rows_total})")
            # her mekandan sonra diske flush
            f.flush()
            os.fsync(f.fileno())

    print(f"\n[bitti] CSV: {CSV_PATH} | Toplam yeni satır: {written_rows_total}")


In [9]:
main()

[i] Mevcut CSV: besiktas_reviews_serpapi_part_full.csv | bilinen kayıt: 0
[i] Hedef mekan sayısı: 15 (TOP_N=15)

[1/15] Le Vapeur Magique - TDİ Karaköy İskelesi | Kemankeş Karamustafa Paşa, Rıhtım Cd. No:2/3, 34110 Beyoğlu/İstanbul, Türkiye
    ▹ Sayfa 1: 8 yorum
    ▹ Sayfa 2: 10 yorum
    ▹ Sayfa 3: 10 yorum
    ▹ Sayfa 4: 10 yorum
    ▹ Sayfa 5: 10 yorum
    ▹ Sayfa 6: 10 yorum
    ▹ Sayfa 7: 10 yorum
    ▹ Sayfa 8: 10 yorum
    ▹ Sayfa 9: 10 yorum
    ▹ Sayfa 10: 10 yorum
    ▹ Sayfa 11: 10 yorum
    ▹ Sayfa 12: 10 yorum
    ▹ Sayfa 13: 10 yorum
    ▹ Sayfa 14: 10 yorum
    ▹ Sayfa 15: 10 yorum
    ✓ Yazılan yorum: 148 (toplam=148)

[2/15] Meat Moot Istanbul Taksim | Şehit Muhtar, İmam Adnan Sk. No: 5, 34010 Beyoğlu/İstanbul, Türkiye
    ▹ Sayfa 1: 8 yorum
    ▹ Sayfa 2: 10 yorum
    ▹ Sayfa 3: 10 yorum
    ▹ Sayfa 4: 10 yorum
    ▹ Sayfa 5: 10 yorum
    ▹ Sayfa 6: 10 yorum
    ▹ Sayfa 7: 10 yorum
    ▹ Sayfa 8: 10 yorum
    ▹ Sayfa 9: 10 yorum
    ▹ Sayfa 10: 10 yorum
    ▹ Sayfa 