# Tokopedia

In [1]:
pip install selenium webdriver-manager

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException

# --- KONFIGURASI ---
BASE_URL = "https://www.tokopedia.com/search?fcity=101&st=product&q=Kota%20Solok"
DELAY_PAGE_LOAD = 5        # Waktu tunggu loading halaman
DELAY_REQUEST_INTERVAL = 2.0 # Jeda antar produk

def setup_driver():
    """Konfigurasi Chrome Driver agar tidak terdeteksi bot"""
    chrome_options = Options()
    chrome_options.add_argument("--disable-blink-features=AutomationControlled") 
    chrome_options.add_argument("--start-maximized")
    # Menggunakan User Agent umum agar terlihat seperti user asli
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    return driver

def deep_scroll(driver):
    """
    Fungsi Scroll Agresif + Klik 'Muat Lebih Banyak'
    """
    print("[*] Memulai Deep Scrolling (Mohon tunggu, ini butuh waktu)...")
    
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        # 1. Scroll pelan dari atas ke bawah (step 500px)
        current_scroll = driver.execute_script("return window.pageYOffset")
        
        while True:
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(1.5) # Waktu render gambar (sedikit dipercepat)
            
            new_scroll = driver.execute_script("return window.pageYOffset + window.innerHeight")
            doc_height = driver.execute_script("return document.body.scrollHeight")
            
            if new_scroll >= doc_height:
                break
        
        # 2. Tunggu loading AJAX Tokopedia
        print("    -> Menunggu loading produk tambahan...")
        time.sleep(4) 
        
        new_height = driver.execute_script("return document.body.scrollHeight")
        
        # 3. LOGIKA BARU: Cek apakah mentok? Jika iya, cari tombol "Muat Lebih Banyak"
        if new_height == last_height:
            print("[?] Tinggi halaman tidak berubah. Mencari tombol 'Muat Lebih Banyak'...")
            
            try:
                # Kita cari tombol berdasarkan class yang kamu berikan ATAU teksnya (biar lebih robust)
                # XPath ini artinya: Cari button yang class-nya mengandung 'css-1turmok' ATAU yang ada tulisan 'Muat Lebih Banyak'
                tombol_muat = driver.find_element(By.XPATH, "//button[contains(@class, 'css-1turmok-unf-btn') and .//span[contains(text(), 'Muat Lebih Banyak')]]")
                
                if tombol_muat:
                    print("[+] Tombol DITEMUKAN! Mengklik tombol...")
                    # Klik menggunakan JavaScript (lebih ampuh daripada .click() biasa)
                    driver.execute_script("arguments[0].click();", tombol_muat)
                    
                    # Beri waktu loading setelah klik
                    time.sleep(5)
                    
                    # Update tinggi halaman baru setelah klik
                    new_height = driver.execute_script("return document.body.scrollHeight")
                    last_height = new_height
                    
                    # Lanjut ke loop berikutnya (jangan break)
                    print("    -> Lanjut scrolling...")
                    continue
            except NoSuchElementException:
                # Jika tombol benar-benar tidak ada, berarti memang sudah habis
                print("[*] Tombol tidak ditemukan. Halaman benar-benar sudah mentok.")
                break
            except Exception as e:
                print(f"[!] Gagal klik tombol: {e}")
                break
            
            # Jika sampai sini (tidak ada tombol dan tinggi sama), berhenti
            break
            
        last_height = new_height

def get_product_links(driver, url):
    """Ambil SEMUA link produk di halaman"""
    print(f"[*] Membuka search page: {url}")
    driver.get(url)
    time.sleep(DELAY_PAGE_LOAD)
    
    # --- JALANKAN SCROLLING ---
    deep_scroll(driver)
    # --------------------------

    links = []
    try:
        # Cari semua gambar produk
        images = driver.find_elements(By.XPATH, '//img[@alt="product-image"]')
        print(f"[*] Total elemen gambar terdeteksi: {len(images)}")

        for img in images:
            try:
                parent_anchor = img.find_element(By.XPATH, "./ancestor::a")
                href = parent_anchor.get_attribute("href")
                
                if href and "tokopedia.com" in href and "ta.tokopedia.com" not in href:
                    links.append(href)
            except:
                continue
    except Exception as e:
        print(f"[!] Error get links: {e}")

    unique_links = list(set(links))
    print(f"[*] Total Link Unik & Valid ditemukan: {len(unique_links)}")
    return unique_links

def scrape_details(driver, links):
    """Masuk ke setiap link dan ambil detail"""
    data_hasil = []
    total_link = len(links)
    
    print(f"\n[*] Mulai scraping {total_link} produk...")
    
    for i, link in enumerate(links):
        print(f"[{i+1}/{total_link}] Memproses data...")
        
        try:
            driver.get(link)
            # time.sleep dikurangi sedikit agar efisien, sesuaikan jika internet lambat
            time.sleep(2) 
            
            # 1. Nama Usaha
            try:
                nama_usaha = driver.find_element(By.CSS_SELECTOR, "h2.css-1ceqk3d-unf-heading").text
            except:
                nama_usaha = "-"

            # 2. Kota
            try:
                kota_raw = driver.find_element(By.CSS_SELECTOR, "h2.css-793nib-unf-heading").text
                kota = kota_raw.replace("Dikirim dari", "").strip()
            except:
                kota = "-"
            
            data_hasil.append({
                "Nama Usaha": nama_usaha,
                "Kota": kota,
                "Link Produk": link
            })
            
            print(f"    > {nama_usaha} | {kota}")

        except Exception as e:
            print(f"    [!] Gagal pada link ini: {e}")
        
        time.sleep(DELAY_REQUEST_INTERVAL)

    return data_hasil

# --- EKSEKUSI UTAMA ---
if __name__ == "__main__":
    driver = setup_driver()
    
    try:
        product_links = get_product_links(driver, BASE_URL)
        
        if product_links:
            hasil_scraping = scrape_details(driver, product_links)
            
            df = pd.DataFrame(hasil_scraping)
            
            print("\n" + "="*40)
            print("        HASIL SCRAPING         ")
            print("="*40)
            print(df) 
            

            
        else:
            print("[!] Tidak ada link yang ditemukan. Cek koneksi atau selector.")
            
    finally:
        driver.quit()

[*] Membuka search page: https://www.tokopedia.com/search?fcity=101&st=product&q=Kota%20Solok
[*] Memulai Deep Scrolling (Mohon tunggu, ini butuh waktu)...
    -> Menunggu loading produk tambahan...
    -> Menunggu loading produk tambahan...
[?] Tinggi halaman tidak berubah. Mencari tombol 'Muat Lebih Banyak'...
[+] Tombol DITEMUKAN! Mengklik tombol...
    -> Lanjut scrolling...
    -> Menunggu loading produk tambahan...
[?] Tinggi halaman tidak berubah. Mencari tombol 'Muat Lebih Banyak'...
[*] Tombol tidak ditemukan. Halaman benar-benar sudah mentok.
[*] Total elemen gambar terdeteksi: 28
[*] Total Link Unik & Valid ditemukan: 28

[*] Mulai scraping 28 produk...
[1/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[2/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[3/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[4/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[5/28] Memproses data...
    > dapurmama tara | Kota Solok
[6/28] Memproses

In [3]:
df

Unnamed: 0,Nama Usaha,Kota,Link Produk
0,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
1,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
2,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
3,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
4,dapurmama tara,Kota Solok,https://www.tokopedia.com/dapurmama-tara/bawan...
5,Nan lamak,Kota Solok,https://www.tokopedia.com/nan-lamak/karak-kali...
6,putra bumbu minang,Kota Solok,https://www.tokopedia.com/putra-bumbu-minang/p...
7,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
8,Nan lamak,Kota Solok,https://www.tokopedia.com/nan-lamak/randang-da...
9,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/p...


In [7]:
# INPUT BASE URL

BASE_URL= input("ex: https://www.tokopedia.com/search?fcity=101&st=product&q=Kota%20Solok")

In [8]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException

# --- KONFIGURASI ---
BASE_URL = BASE_URL
DELAY_PAGE_LOAD = 10 #5        # Waktu tunggu loading halaman
DELAY_REQUEST_INTERVAL = 2.0 # Jeda antar produk

def setup_driver():
    """Konfigurasi Chrome Driver agar tidak terdeteksi bot"""
    chrome_options = Options()
    chrome_options.add_argument("--disable-blink-features=AutomationControlled") 
    chrome_options.add_argument("--start-maximized")
    # Menggunakan User Agent umum agar terlihat seperti user asli
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    return driver

def deep_scroll(driver):
    """
    Fungsi Scroll Agresif + Klik 'Muat Lebih Banyak'
    """
    print("[*] Memulai Deep Scrolling (Mohon tunggu, ini butuh waktu)...")
    
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        # 1. Scroll pelan dari atas ke bawah (step 500px)
        current_scroll = driver.execute_script("return window.pageYOffset")
        
        while True:
            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(1.5) # Waktu render gambar (sedikit dipercepat)
            
            new_scroll = driver.execute_script("return window.pageYOffset + window.innerHeight")
            doc_height = driver.execute_script("return document.body.scrollHeight")
            
            if new_scroll >= doc_height:
                break
        
        # 2. Tunggu loading AJAX Tokopedia
        print("    -> Menunggu loading produk tambahan...")
        time.sleep(4) 
        
        new_height = driver.execute_script("return document.body.scrollHeight")
        
        # 3. LOGIKA BARU: Cek apakah mentok? Jika iya, cari tombol "Muat Lebih Banyak"
        if new_height == last_height:
            print("[?] Tinggi halaman tidak berubah. Mencari tombol 'Muat Lebih Banyak'...")
            
            try:
                # Kita cari tombol berdasarkan class yang kamu berikan ATAU teksnya (biar lebih robust)
                # XPath ini artinya: Cari button yang class-nya mengandung 'css-1turmok' ATAU yang ada tulisan 'Muat Lebih Banyak'
                tombol_muat = driver.find_element(By.XPATH, "//button[contains(@class, 'css-1turmok-unf-btn') and .//span[contains(text(), 'Muat Lebih Banyak')]]")
                
                if tombol_muat:
                    print("[+] Tombol DITEMUKAN! Mengklik tombol...")
                    # Klik menggunakan JavaScript (lebih ampuh daripada .click() biasa)
                    driver.execute_script("arguments[0].click();", tombol_muat)
                    
                    # Beri waktu loading setelah klik
                    time.sleep(5)
                    
                    # Update tinggi halaman baru setelah klik
                    new_height = driver.execute_script("return document.body.scrollHeight")
                    last_height = new_height
                    
                    # Lanjut ke loop berikutnya (jangan break)
                    print("    -> Lanjut scrolling...")
                    continue
            except NoSuchElementException:
                # Jika tombol benar-benar tidak ada, berarti memang sudah habis
                print("[*] Tombol tidak ditemukan. Halaman benar-benar sudah mentok.")
                break
            except Exception as e:
                print(f"[!] Gagal klik tombol: {e}")
                break
            
            # Jika sampai sini (tidak ada tombol dan tinggi sama), berhenti
            break
            
        last_height = new_height

def get_product_links(driver, url):
    """Ambil SEMUA link produk di halaman"""
    print(f"[*] Membuka search page: {url}")
    driver.get(url)
    time.sleep(DELAY_PAGE_LOAD)
    
    # --- JALANKAN SCROLLING ---
    deep_scroll(driver)
    # --------------------------

    links = []
    try:
        # Cari semua gambar produk
        images = driver.find_elements(By.XPATH, '//img[@alt="product-image"]')
        print(f"[*] Total elemen gambar terdeteksi: {len(images)}")

        for img in images:
            try:
                parent_anchor = img.find_element(By.XPATH, "./ancestor::a")
                href = parent_anchor.get_attribute("href")
                
                if href and "tokopedia.com" in href and "ta.tokopedia.com" not in href:
                    links.append(href)
            except:
                continue
    except Exception as e:
        print(f"[!] Error get links: {e}")

    unique_links = list(set(links))
    print(f"[*] Total Link Unik & Valid ditemukan: {len(unique_links)}")
    return unique_links

def scrape_details(driver, links):
    """Masuk ke setiap link dan ambil detail"""
    data_hasil = []
    total_link = len(links)
    
    print(f"\n[*] Mulai scraping {total_link} produk...")
    
    for i, link in enumerate(links):
        print(f"[{i+1}/{total_link}] Memproses data...")
        
        try:
            driver.get(link)
            # time.sleep dikurangi sedikit agar efisien, sesuaikan jika internet lambat
            time.sleep(2) 
            
            # 1. Nama Usaha
            try:
                nama_usaha = driver.find_element(By.CSS_SELECTOR, "h2.css-1ceqk3d-unf-heading").text
            except:
                nama_usaha = "-"

            # 2. Kota
            try:
                kota_raw = driver.find_element(By.CSS_SELECTOR, "h2.css-793nib-unf-heading").text
                kota = kota_raw.replace("Dikirim dari", "").strip()
            except:
                kota = "-"
            
            data_hasil.append({
                "Nama Usaha": nama_usaha,
                "Kota": kota,
                "Link Produk": link
            })
            
            print(f"    > {nama_usaha} | {kota}")

        except Exception as e:
            print(f"    [!] Gagal pada link ini: {e}")
        
        time.sleep(DELAY_REQUEST_INTERVAL)

    return data_hasil

# --- EKSEKUSI UTAMA ---
if __name__ == "__main__":
    driver = setup_driver()
    
    try:
        product_links = get_product_links(driver, BASE_URL)
        
        if product_links:
            hasil_scraping = scrape_details(driver, product_links)
            
            df = pd.DataFrame(hasil_scraping)
            
            print("\n" + "="*40)
            print("        HASIL SCRAPING         ")
            print("="*40)
            print(df) 
            

            
        else:
            print("[!] Tidak ada link yang ditemukan. Cek koneksi atau selector.")
            
    finally:
        driver.quit()

[*] Membuka search page: https://www.tokopedia.com/search?fcity=101&st=product&q=Kota%20Solok
[*] Memulai Deep Scrolling (Mohon tunggu, ini butuh waktu)...
    -> Menunggu loading produk tambahan...
    -> Menunggu loading produk tambahan...
[?] Tinggi halaman tidak berubah. Mencari tombol 'Muat Lebih Banyak'...
[*] Tombol tidak ditemukan. Halaman benar-benar sudah mentok.
[*] Total elemen gambar terdeteksi: 28
[*] Total Link Unik & Valid ditemukan: 28

[*] Mulai scraping 28 produk...
[1/28] Memproses data...
    > Nan lamak | Kota Solok
[2/28] Memproses data...
    > dapurmama tara | Kota Solok
[3/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[4/28] Memproses data...
    > putra bumbu minang | Kota Solok
[5/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[6/28] Memproses data...
    > dapurmama tara | Kota Solok
[7/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[8/28] Memproses data...
    > Rempah Khas Minang | Kota Solok
[9/28] Memproses data..

In [9]:
df

Unnamed: 0,Nama Usaha,Kota,Link Produk
0,Nan lamak,Kota Solok,https://www.tokopedia.com/nan-lamak/kopi-bitan...
1,dapurmama tara,Kota Solok,https://www.tokopedia.com/dapurmama-tara/kerup...
2,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
3,putra bumbu minang,Kota Solok,https://www.tokopedia.com/putra-bumbu-minang/p...
4,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
5,dapurmama tara,Kota Solok,https://www.tokopedia.com/dapurmama-tara/bawan...
6,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
7,Rempah Khas Minang,Kota Solok,https://www.tokopedia.com/rempah-khas-minang/b...
8,Nan lamak,Kota Solok,https://www.tokopedia.com/nan-lamak/randang-da...
9,Nan lamak,Kota Solok,https://www.tokopedia.com/nan-lamak/karak-kali...


# SHOPEE

In [12]:
pip install selenium beautifulsoup4 pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# --- KONFIGURASI ---
PHONE = "085185493315"
PASSWORD = "" #isi password
BASE_URL = "https://shopee.co.id"
SEARCH_URL_TEMPLATE = "https://shopee.co.id/search?keyword=solok&page={}"
PAGES_TO_SCRAPE = 2 # Coba 2 halaman dulu untuk test

def setup_driver():
    options = webdriver.ChromeOptions()
    # Fitur krusial agar tidak terdeteksi sebagai bot
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36")
    options.add_argument("--start-maximized")
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    
    # Menyamarkan objek navigator.webdriver
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    return driver

def login_shopee(driver):
    print("[INFO] Membuka halaman login...")
    driver.get(f"{BASE_URL}/buyer/login")
    wait = WebDriverWait(driver, 20)

    try:
        # 1. Masukkan No HP
        user_field = wait.until(EC.presence_of_element_located((By.NAME, "loginKey")))
        user_field.send_keys(PHONE)
        time.sleep(1.5) # Jeda agar seperti manusia mengetik

        # 2. Masukkan Password
        pass_field = driver.find_element(By.NAME, "password")
        pass_field.send_keys(PASSWORD)
        time.sleep(1.5)

        # 3. Klik Tombol Login
        # Menggunakan selector yang lebih fleksibel
        login_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Log in') or contains(text(), 'Login')]")))
        login_btn.click()
        print("[INFO] Tombol Login diklik.")

        # --- TAHAP VERIFIKASI ---
        print("[PENTING] Jika muncul Puzzle/CAPTCHA, selesaikan secara manual dalam 20 detik!")
        
        # Menunggu sampai URL berubah atau elemen profil muncul (tanda sukses login)
        time.sleep(10) 
        
    except Exception as e:
        print(f"[ERROR] Gagal login: {e}")

def scrape_data(driver):
    all_data = []
    for page in range(0, PAGES_TO_SCRAPE):
        url = SEARCH_URL_TEMPLATE.format(page)
        print(f"[INFO] Scraping halaman {page}...")
        driver.get(url)
        time.sleep(5)

        # Scroll halus (Lazy Loading)
        for _ in range(3):
            driver.execute_script("window.scrollBy(0, 1000);")
            time.sleep(2)

        # Ambil Link Produk
        links = driver.find_elements(By.CSS_SELECTOR, "a[data-sqe='link']")
        product_urls = [l.get_attribute('href') for l in links if l.get_attribute('href')]

        for link in product_urls[:5]: # Batasi 5 per hal untuk testing
            try:
                driver.get(link)
                time.sleep(3)
                
                # Nama Toko
                try:
                    toko = driver.find_element(By.CSS_SELECTOR, ".V6S7ne, .fV3TIn").text
                except:
                    toko = "N/A"

                # Kota
                try:
                    kota = driver.find_element(By.XPATH, "//div[text()='Dikirim Dari']/following-sibling::div").text
                except:
                    kota = "N/A"

                print(f" > {toko} | {kota}")
                all_data.append({"Toko": toko, "Kota": kota, "Link": link})
            except:
                continue
    return all_data

if __name__ == "__main__":
    driver = setup_driver()
    try:
        login_shopee(driver)
        # Cek jika masih di halaman login, tunggu sebentar untuk interaksi manual
        if "login" in driver.current_url:
            print("[INFO] Masih di halaman login. Menunggu verifikasi manual...")
            time.sleep(15)
            
        hasil = scrape_data(driver)
        if hasil:
            pd.DataFrame(hasil).to_csv("data_shopee_alber.csv", index=False)
            print("[DONE] Data berhasil disimpan.")
    finally:
        driver.quit()

[INFO] Membuka halaman login...
[ERROR] Gagal login: Message: 
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x485093
	0x4850d4
	0x27b490
	0x2c58da
	0x2c5b7b
	0x307b42
	0x2e8204
	0x305341
	0x2e7f56
	0x2b96c9
	0x2ba484
	0x6d7e34
	0x6d30c9
	0x6f0add
	0x49db38
	0x4a58ad
	0x48d848
	0x48da12
	0x4775fa
	0x74dc5d49
	0x76f9d83b
	0x76f9d7c1

[INFO] Masih di halaman login. Menunggu verifikasi manual...
[INFO] Scraping halaman 0...
[INFO] Scraping halaman 1...
