In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# DataFrame to store product information
promo_sku = pd.DataFrame(columns=["productName", "basePrice", "finalPrice", "discountPercent"])

# Headers for the request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Referer': 'https://www.tokopedia.com/unilever/',
    'X-Tkpd-Lite-Service': 'zeus',
    'X-Version': '1227cf6',
    'content-type': 'application/json',
    'X-Device': 'default_v3',
    'X-Source': 'tokopedia-lite',
    'Origin': 'https://www.tokopedia.com',
    'Connection': 'keep-alive'
}

def convert_price(price_str):
    return int(price_str.replace('Rp', '').replace('.', '').strip())

def get_product_data(promo_sku):
    # List to store individual product data
    products_data = []
    try:
        for i in range(80):
            response = requests.post(f'https://www.tokopedia.com/unilever/product/page/{i}', headers=headers)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Find the product cards using the testid
            product_cards = soup.find_all('div', {'data-testid': 'master-product-card'})
            
            for product_card in product_cards:
                # Extracting product details if the product card exists
                if product_card:
                    # Extract the product name
                    product_name = product_card.find('div', {'data-testid': 'linkProductName'}).text
                    
                    # Extract the product price (final price)
                    product_price = product_card.find('div', {'data-testid': 'linkProductPrice'}).text
                    final_price = convert_price(product_price)
                    
                    # Extract the discount (if available)
                    product_discount = product_card.find('div', {'data-testid': 'lblProductDiscount'})
                    discount_percent = float(product_discount.text.replace("%", "")) if product_discount else 0
                    
                    # Extract the original price (if available)
                    original_price = product_card.find('div', {'data-testid': 'lblProductSlashPrice'})
                    base_price = convert_price(original_price.text) if original_price else final_price
                    
                    # Append the data to the list
                    products_data.append([product_name, base_price, final_price, discount_percent])
            
            # Insert into the DataFrame and remove duplicates
            promo_sku = pd.concat([promo_sku, pd.DataFrame(products_data, columns=["productName", "basePrice", "finalPrice", "discountPercent"])], ignore_index=True)
            promo_sku.drop_duplicates(subset=["productName"], inplace=True)

            # Wait before the next request to avoid getting blocked
            time.sleep(1)
    except Exception as e:
        print(f"Error: {e}")
    return promo_sku



In [4]:

# Execute the function and get the product data
promo_sku = get_product_data(promo_sku)

# Display the DataFrame
print(promo_sku)

  promo_sku = pd.concat([promo_sku, pd.DataFrame(products_data, columns=["productName", "basePrice", "finalPrice", "discountPercent"])], ignore_index=True)


                                           productName basePrice finalPrice  \
0                    Citra Pearly Glow Uv Lotion 120Ml     17900      12900   
1                Closeup White Fresh Charcoal Mint 95g     35900      25600   
2                FREE Molto Trika Japanese Peach 300ml   1000000    1000000   
3                  FREE Molto Trika Floral Bliss 300ml   1000000    1000000   
4                FREE Rinso Kapsul Lavender Fresh 126g   1000000    1000000   
..                                                 ...       ...        ...   
745  Lux Body Wash Sabun Mandi Cair Pump Soft Rose ...     86800      81500   
746  Vixal Pembersih Porselen Kuat Harum 780ml Free...     59700      33500   
747  Dove Hair Tonic Intensive Leave On Treatment 5...     79800      57000   
748  Lifebuoy Sabun Cair Shiso & Sandalwood 900ml i...    180800     102300   
749  Molto All-In-1 Pink Pewangi & Pelembut 720Ml F...    150700      68900   

     discountPercent  
0               28.0  
1    

In [5]:
from datetime import datetime

file_name = f"../tokopedia/TOKOPEDIA_{datetime.now().strftime('%y%m%d')}.xlsx"
promo_sku.to_excel(file_name,index=False)