In [10]:
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# Retry strategy
retry_strategy = Retry(
    total=5,
    backoff_factor=1,
    status_forcelist=[429, 500, 502, 503, 504],
)

adapter = HTTPAdapter(max_retries=retry_strategy)
http = requests.Session()
http.mount("https://", adapter)
http.mount("http://", adapter)


In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# DataFrame to store product information
promo_sku = pd.DataFrame(columns=["productName", "basePrice", "finalPrice", "discountPercent"])

# Headers for the request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Referer': 'https://www.tokopedia.com/unilever/',
    'X-Tkpd-Lite-Service': 'zeus',
    'X-Version': '1227cf6',
    'content-type': 'application/json',
    'X-Device': 'default_v3',
    'X-Source': 'tokopedia-lite',
    'Origin': 'https://www.tokopedia.com',
    'Connection': 'keep-alive'
}

def convert_price(price_str):
    return int(price_str.replace('Rp', '').replace('.', '').strip())

def parse_product_card(product_card):
    try:
        # Product name
        name_tag = product_card.find('span', class_='_0T8-iGxMpV6NEsYEhwkqEg==')
        product_name = name_tag.text.strip() if name_tag else ""

        # Final price
        final_price_tag = product_card.find('div', class_='_67d6E1xDKIzw+i2D2L0tjw==')
        final_price = convert_price(final_price_tag.text) if final_price_tag else 0

        # Original/base price
        base_price_tag = product_card.find('span', class_='q6wH9+Ht7LxnxrEgD22BCQ==')
        base_price = convert_price(base_price_tag.text) if base_price_tag else final_price

        # Discount percent
        discount_tag = product_card.find('span', class_='vRrrC5GSv6FRRkbCqM7QcQ==')
        discount_percent = float(discount_tag.text.replace("%", "").strip()) if discount_tag else 0

        return [product_name, base_price, final_price, discount_percent]
    except Exception as e:
        print(f"Failed to parse product card: {e}")
        return None

def get_product_data(promo_sku):
    products_data = []
    etalase_links = set()

    try:
        # Step 1: Crawl general product pages
        for i in range(200):  # Adjust page range as needed
            url = f'https://www.tokopedia.com/unilever/product/page/{i}?perpage=10'
            response = http.post(url, headers=headers, verify=False)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Check if there are no products on the page
            empty_msg = soup.find('h5', class_='css-1e3cf11-unf-heading e1qvo2ff5')
            if empty_msg and "Toko ini belum memiliki produk" in empty_msg.text:
                print(f"Page {i}: No more products.")
                break

            product_cards = soup.find_all('div', class_='css-79elbk')
            for product_card in product_cards:
                product_info = parse_product_card(product_card)
                if product_info:
                    products_data.append(product_info)

            # Collect etalase links from sidebar menu (only once)
            if not etalase_links:
                sidebar_menu = soup.find('ul', class_='css-17mrx6g')
                if sidebar_menu:
                    for a in sidebar_menu.find_all('a', href=True):
                        href = a['href']
                        if href.startswith('/unilever/etalase/'):
                            etalase_links.add("https://www.tokopedia.com" + href)

            time.sleep(30)

        # Step 2: Crawl all products under each etalase
        for etalase_url in etalase_links:
            print(f"Scraping etalase: {etalase_url}")
            for i in range(100):  # Adjust page range per etalase
                page_url = f"{etalase_url}/page/{i}?perpage=10"
                response = http.post(page_url, headers=headers, verify=False)
                soup = BeautifulSoup(response.content, 'html.parser')
                product_cards = soup.find_all('div', class_='css-79elbk')
                
                # Check if there are no products on the page
                empty_msg = soup.find('h5', class_='css-1e3cf11-unf-heading e1qvo2ff5')
                if empty_msg and "Toko ini belum memiliki produk" in empty_msg.text:
                    print(f"{etalase_url} Page {i}: No more products.")
                    break
                
                for product_card in product_cards:
                    product_info = parse_product_card(product_card)
                    if product_info:
                        products_data.append(product_info)

                time.sleep(30)

        # Combine with existing DataFrame
        new_df = pd.DataFrame(products_data, columns=["productName", "basePrice", "finalPrice", "discountPercent"])
        promo_sku = pd.concat([promo_sku, new_df], ignore_index=True)
        promo_sku.drop_duplicates(subset=["productName"], inplace=True)

    except Exception as e:
        print(f"Error: {e}")

    return promo_sku




In [12]:

# Execute the function and get the product data
promo_sku = get_product_data(promo_sku)

# Display the DataFrame
print(promo_sku)



Page 78: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/lifebuoy-bodywash




https://www.tokopedia.com/unilever/etalase/lifebuoy-bodywash Page 5: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/simple




https://www.tokopedia.com/unilever/etalase/simple Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/perawatan-rambut




https://www.tokopedia.com/unilever/etalase/perawatan-rambut Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/jaminan-harga-termurah




https://www.tokopedia.com/unilever/etalase/jaminan-harga-termurah Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/st-ives




https://www.tokopedia.com/unilever/etalase/st-ives Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/lux




https://www.tokopedia.com/unilever/etalase/lux Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/produk-baru-unilever




https://www.tokopedia.com/unilever/etalase/produk-baru-unilever Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/perawatan-wajah




https://www.tokopedia.com/unilever/etalase/perawatan-wajah Page 6: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/rinso-matic




https://www.tokopedia.com/unilever/etalase/rinso-matic Page 4: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/kebutuhan-bumbu-dapur-dan-minuman




https://www.tokopedia.com/unilever/etalase/kebutuhan-bumbu-dapur-dan-minuman Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/molto-konsentrat




https://www.tokopedia.com/unilever/etalase/molto-konsentrat Page 5: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/vaseline




https://www.tokopedia.com/unilever/etalase/vaseline Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/sari-murni




https://www.tokopedia.com/unilever/etalase/sari-murni Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/bango




https://www.tokopedia.com/unilever/etalase/bango Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/vixal




https://www.tokopedia.com/unilever/etalase/vixal Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/lifebuoy-shampoo




https://www.tokopedia.com/unilever/etalase/lifebuoy-shampoo Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/sariwangi




https://www.tokopedia.com/unilever/etalase/sariwangi Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/dove-hair




https://www.tokopedia.com/unilever/etalase/dove-hair Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/beli-banyak-lebih-hemat




https://www.tokopedia.com/unilever/etalase/beli-banyak-lebih-hemat Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/proteksi-higienies




https://www.tokopedia.com/unilever/etalase/proteksi-higienies Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/dove




https://www.tokopedia.com/unilever/etalase/dove Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/molto




https://www.tokopedia.com/unilever/etalase/molto Page 7: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/pepsodent




https://www.tokopedia.com/unilever/etalase/pepsodent Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/buavita




https://www.tokopedia.com/unilever/etalase/buavita Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/citra




https://www.tokopedia.com/unilever/etalase/citra Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/royco




https://www.tokopedia.com/unilever/etalase/royco Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/zwitsal




https://www.tokopedia.com/unilever/etalase/zwitsal Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/glow-and-lovely




https://www.tokopedia.com/unilever/etalase/glow-and-lovely Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/perawatan-tubuh




https://www.tokopedia.com/unilever/etalase/perawatan-tubuh Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/superpell




https://www.tokopedia.com/unilever/etalase/superpell Page 4: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/tresemme




https://www.tokopedia.com/unilever/etalase/tresemme Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/molto-pewangi




https://www.tokopedia.com/unilever/etalase/molto-pewangi Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/sunsilk




https://www.tokopedia.com/unilever/etalase/sunsilk Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/wipol




https://www.tokopedia.com/unilever/etalase/wipol Page 4: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/molto-trika




https://www.tokopedia.com/unilever/etalase/molto-trika Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/perawatan-pria




https://www.tokopedia.com/unilever/etalase/perawatan-pria Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/clear




https://www.tokopedia.com/unilever/etalase/clear Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/rexona




https://www.tokopedia.com/unilever/etalase/rexona Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/paket-berhadiah-unilever




https://www.tokopedia.com/unilever/etalase/paket-berhadiah-unilever Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/baby-dove




https://www.tokopedia.com/unilever/etalase/baby-dove Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/international-brands




https://www.tokopedia.com/unilever/etalase/international-brands Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/ponds-men




https://www.tokopedia.com/unilever/etalase/ponds-men Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/axe




https://www.tokopedia.com/unilever/etalase/axe Page 1: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/baru-superpell-sakura




https://www.tokopedia.com/unilever/etalase/baru-superpell-sakura Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/sunlight-biocare




https://www.tokopedia.com/unilever/etalase/sunlight-biocare Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/ponds-biome




https://www.tokopedia.com/unilever/etalase/ponds-biome Page 3: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/tmt_24421




https://www.tokopedia.com/unilever/etalase/tmt_24421 Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/closeup




https://www.tokopedia.com/unilever/etalase/closeup Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/sunlight




https://www.tokopedia.com/unilever/etalase/sunlight Page 7: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/lifebuoy-pencuci-piring




https://www.tokopedia.com/unilever/etalase/lifebuoy-pencuci-piring Page 4: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/beli-1-gratis-1




https://www.tokopedia.com/unilever/etalase/beli-1-gratis-1 Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/rinso-cuci-kucek




https://www.tokopedia.com/unilever/etalase/rinso-cuci-kucek Page 10: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/keperluan-bayi




https://www.tokopedia.com/unilever/etalase/keperluan-bayi Page 5: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/kebersihan-rumah




https://www.tokopedia.com/unilever/etalase/kebersihan-rumah Page 25: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/cif




https://www.tokopedia.com/unilever/etalase/cif Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/sold?sort=7




Scraping etalase: https://www.tokopedia.com/unilever/etalase/kebutuhan-bisnis




https://www.tokopedia.com/unilever/etalase/kebutuhan-bisnis Page 2: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/ponds




https://www.tokopedia.com/unilever/etalase/ponds Page 8: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/best-pick




https://www.tokopedia.com/unilever/etalase/best-pick Page 9: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/pepsodent-x-fifa




https://www.tokopedia.com/unilever/etalase/pepsodent-x-fifa Page 0: No more products.
Scraping etalase: https://www.tokopedia.com/unilever/etalase/beauty-kilat




https://www.tokopedia.com/unilever/etalase/beauty-kilat Page 2: No more products.
                                            productName basePrice finalPrice  \
0                                                               0          0   
1     Rinso Washing Machine Cleaner 125gr isi 6 FREE...    139800     139800   
2     [FLASH SALE] [Buy 4 FREE 2] Molto Trika Pelici...     27800      18400   
3     Sunlight Sabun Cuci Piring Extra Korean Strawb...    125400      65100   
4     Sunlight Sabun Cuci Piring Extra Korean Strawb...    104500      54300   
...                                                 ...       ...        ...   
2144                  Pond'S Age Miracle Whip Cream 20G    114900      54000   
2149  POND'S MICELLAR WATER BRIGHTENING ROSE MAKEUP ...     42900      18000   
2150  Ponds Vitamin Micellar Water (Makeup Remover) ...     92900      35000   
2151  POND'S AGE MIRACLE SERUM WAJAH YOUTHFUL GLOW 3...    277800     164700   
2176  Sunlight Sabun Cuci Piring Jeruk

  promo_sku = pd.concat([promo_sku, new_df], ignore_index=True)


In [13]:
from datetime import datetime

file_name = f"../tokopedia/TOKOPEDIA_{datetime.now().strftime('%y%m%d')}.xlsx"
promo_sku.to_excel(file_name,index=False)