In [34]:
import time
import re
import pandas as pd
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from requests_html import HTMLSession

In [29]:
# Яндекс

def fetch_yandex_reviews(place_id: str,
                         headless: bool = True,
                         wait_scroll: float = 2.0) -> pd.DataFrame:

    opts = Options()
    if headless:
        opts.add_argument("--headless")
    opts.add_argument("--window-size=1600,1000")
    opts.add_argument("--disable-gpu")
    opts.add_argument("--no-sandbox")
    opts.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=opts
    )

    url = f"https://yandex.ru/maps/org/{place_id}/reviews"
    driver.get(url)
    time.sleep(3)

    prev_h = driver.execute_script("return document.body.scrollHeight;")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(wait_scroll)
        new_h = driver.execute_script("return document.body.scrollHeight;")
        if new_h == prev_h:
            break
        prev_h = new_h

    spoiler_buttons = driver.find_elements(By.CSS_SELECTOR, "span.spoiler-view__button")
    for btn in spoiler_buttons:
        try:
            driver.execute_script("arguments[0].click();", btn)
            time.sleep(0.1)
        except:
            pass

    rows = []
    infos = driver.find_elements(By.CSS_SELECTOR, "div.business-review-view__info")
    for info in infos:

        try:
            author = info.find_element(
                By.CSS_SELECTOR,
                "div.business-review-view__author-container"
            ).text.splitlines()[0].strip()
        except:
            author = None

        # Дата
        try:
            date = info.find_element(
                By.CSS_SELECTOR,
                "span.business-review-view__date > span"
            ).text.strip()
        except:
            date = None

        # Рейтинг
        try:
            rating_meta = info.find_element(
                By.CSS_SELECTOR,
                "meta[itemprop='ratingValue']"
            )
            rating = int(float(rating_meta.get_attribute("content")))
        except:
            rating = None

        # Полный текст отзыва
        try:
            raw = info.find_element(
                By.XPATH,
                ".//div[@itemprop='reviewBody']"
            ).text
            text = re.sub(r"\s+", " ", raw).strip()
        except:
            text = ""

        rows.append({
            "author": author,
            "date":   date,
            "text":   text,
            "rating": rating,
            "source": "yandex"
        })

    driver.quit()
    return pd.DataFrame(rows)


In [42]:
# 2GIS

def fetch_2gis_reviews(business_id: str, api_key: str, page_size=50):
    url = "https://public-api.reviews.2gis.com/2.0/branches/{}/reviews".format(business_id)
    offset = 0
    rows = []

    while True:
        resp = requests.get(url.format(business_id),
                            params={
                                "key": api_key,
                                "locale": "ru_RU",
                                "limit": page_size,
                                "offset": offset
                            },
                            timeout=10)
        resp.raise_for_status()
        data = resp.json().get("reviews", [])
        if not data:
            break

        for it in data:
            author = (it.get("user") or {}).get("name", "").strip()
            date = it.get("created_at","")[:10]
            text = (it.get("text") or "").strip()
            rating = it.get("rating")
            rows.append({
                "author": author,
                "date":   date,
                "text":   text,
                "rating": rating,
                "source": "2gis"
            })

        offset += page_size

    return pd.DataFrame(rows)

In [52]:
# Google Maps

def fetch_google_reviews(url: str, headless_render=True):
    session = HTMLSession()
    r = session.get(url)
    if headless_render:
        r.html.render(sleep=5, keep_page=True)

    rows = []
    for b in r.html.find("div.section-review-content"):
        author = b.find("div.section-review-title", first=True).text
        stars  = b.find("span.section-review-stars")
        rating = len(stars)
        date   = b.find("span.section-review-publish-date", first=True).text
        text   = b.find("span.section-review-text", first=True).text
        rows.append({
            "author": author,
            "date":   date,
            "text":   re.sub(r"\s+", " ", text),
            "rating": rating,
            "source": "google"
        })
    session.close()
    return pd.DataFrame(rows)


In [None]:
if __name__ == "__main__":
    # Яндекс
    df_y = fetch_yandex_reviews(place_id="айди_компании", headless=False)
    df_y.to_csv("yandex_cardex.csv", index=False, encoding="utf-8-sig")

    # 2GIS
    df_2 = fetch_2gis_reviews(business_id="айди_компании", api_key="ваш_апи")
    df_2.to_csv("2gis_gasprom.csv", index=False, encoding="utf-8-sig")

    # Google
    # не работает :(
#     url = "https://www.google.com/maps/..."
#     df = fetch_google_reviews(url, headless=False)
#     print(df.shape)
#     df.to_csv("google_reviews.csv", index=False, encoding="utf-8-sig")

    