In [1]:
# ================================
# 1. IMPORT LIBRARY
# ================================
import re
import json
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

# ================================
# 2. HEADER UNTUK REQUEST
# ================================
HEADERS = {
    'accept': '*/*',
    'content-type': 'application/json',
    'origin': 'https://www.tokopedia.com',
    'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
    'x-source': 'tokopedia-lite',
    'x-tkpd-lite-service': 'zeus',
}

# ================================
# 3. AMBIL SHOP ID DARI NAMA TOKO
# ================================
def get_shop_id(shop_domain):
    """
    Cari ShopID berdasarkan nama domain toko e.g : 'wardah-official'
    """
    url = "https://gql.tokopedia.com/graphql/ShopInfoCore"
    data = [{
        "operationName": "ShopInfoCore",
        "variables": {
            "id": 0,
            "domain": shop_domain
        },
        "query": """
        query ShopInfoCore($id: Int!, $domain: String) {
          shopInfoByID(input: {shopIDs: [$id], fields: ["core"], domain: $domain}) {
            result {
              shopCore {
                shopID
                name
              }
            }
          }
        }
        """
    }]
    response = requests.post(url, headers=HEADERS, json=data)
    try:
        return response.json()[0]['data']['shopInfoByID']['result'][0]['shopCore']['shopID']
    except:
        return 0

# ================================
# 4. AMBIL KOMENTAR / REVIEW DARI API
# ================================
def get_reviews_data(shop_id, page=1, limit=10, sort_by="", filter_by="rating=1,2,3,4,5"):
    """
    Mengambil review dari API Tokopedia (per halaman)
    """
    url = 'https://gql.tokopedia.com/graphql/ReviewList'
    data = [{
        "operationName": "ReviewList",
        "variables": {
            "shopID": shop_id,
            "page": page,
            "limit": limit,
            "sortBy": sort_by,
            "filterBy": filter_by
        },
        "query": """
        query ReviewList($shopID: String!, $limit: Int!, $page: Int!, $filterBy: String, $sortBy: String) {
            productrevGetShopReviewReadingList(shopID: $shopID, limit: $limit, page: $page, filterBy: $filterBy, sortBy: $sortBy) {
                list {
                    reviewID
                    rating
                    reviewText
                    reviewTime
                    reviewerName
                    replyText
                    replyTime
                    product {
                        productName
                        productPageURL
                    }
                }
                hasNext
                totalReviews
                shopName
            }
        }
        """
    }]
    response = requests.post(url, headers=HEADERS, json=data)
    return response.json()

# ================================
# 5. SCRAP REVIEW 1 TAHUN TERAKHIR
# ================================
def get_recent_reviews(shop_id, min_reviews=3000, max_reviews=25000, limit_per_page=50):
    """
    Mengambil komentar dalam 1 tahun terakhir
    """
    all_reviews = []
    page = 1
    one_year_ago = datetime.now() - timedelta(days=365)

    while len(all_reviews) < max_reviews:
        result = get_reviews_data(shop_id, page=page, limit=limit_per_page)
        try:
            reviews = result['data']['productrevGetShopReviewReadingList']['list']
            has_next = result['data']['productrevGetShopReviewReadingList']['hasNext']
        except (KeyError, TypeError):
            print("❌ Gagal memproses response.")
            break

        for review in reviews:
            try:
                review_time = datetime.fromtimestamp(review['reviewTime'])
            except (KeyError, TypeError, ValueError):
                continue

            if review_time >= one_year_ago:
                all_reviews.append(review)
            else:
                has_next = False
                break

        if not has_next:
            break

        page += 1

    if len(all_reviews) < min_reviews:
        print(f"❌ Hanya {len(all_reviews)} review ditemukan dalam 1 tahun terakhir (kurang dari {min_reviews}).")
    else:
        print(f"✅ Total review terkumpul: {len(all_reviews)}")

    return all_reviews

# ================================
# 6. CONTOH PENGGUNAAN
# ================================
# Ganti 'wardah-official' dengan nama toko lain sesuai kebutuhan
shop_domain = "wardah-official"
shopID = get_shop_id(shop_domain)
reviews = get_recent_reviews(shopID)


❌ Gagal memproses response.
❌ Hanya 0 review ditemukan dalam 1 tahun terakhir (kurang dari 3000).
