In [1]:
!pip install selenium webdriver-manager pandas




In [6]:
from selenium import webdriver  # Import the Selenium WebDriver
from selenium.webdriver.chrome.service import Service  # For managing ChromeDriver
from selenium.webdriver.common.by import By  # Helps in locating elements
from selenium.webdriver.chrome.options import Options  # Allows setting Chrome options
from webdriver_manager.chrome import ChromeDriverManager  # Automatically installs the latest ChromeDriver
import pandas as pd  # Used for handling and saving data in a structured format
import time  # Adds delays to allow the webpage to load properly

# Set up Chrome options to run in headless mode (no browser window)
options = Options()
options.add_argument("--headless")  # Run without opening a browser window
options.add_argument("--disable-gpu")  # Disable GPU acceleration
options.add_argument("--no-sandbox")  # Bypass OS-level security policies

# Install and initialize ChromeDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# URL of the product page on Lazada
url = "https://www.lazada.com.my/products/sambal-nyet-berapi-ori-dari-khairulaming-i2623278934-s11857920384.html"
driver.get(url)  # Open the webpage
time.sleep(5)  # Wait for the page to load completely

# List to store the scraped reviews
reviews_data = []
max_pages = 5  # Number of pages to scrape

for page in range(1, max_pages + 1):
    print(f"Scraping Page {page}...")

    # Scroll down to make sure all reviews are loaded
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(5)  # Give time for new content to load

    # Find all review elements on the page
    reviews = driver.find_elements(By.CLASS_NAME, "item")  
    
    # Extract review details for each review found
    for review in reviews:
        try:
            reviewer = review.find_element(By.XPATH, ".//div[@class='middle']/span").text.strip()
        except:
            reviewer = "No Name"  # Assign a default value if the name is missing

        try:
            date = review.find_element(By.XPATH, ".//span[@class='title right']").text.strip()
        except:
            date = "No Date"  # Assign a default value if the date is missing

        try:
            content = review.find_element(By.XPATH, ".//div[@class='content']").text.strip()
        except:
            content = "No Review Content"  # Assign a default value if the review is missing

        # Store the extracted data in a list
        reviews_data.append([reviewer, date, content])

    print(f"Page {page} scraped. Total reviews: {len(reviews_data)}")

    # Try to navigate to the next page
    try:
        next_page_xpath = f"//button[contains(@class, 'next-pagination-item') and text()='{page + 1}']"
        
        # Find the pagination area and scroll to it
        pagination_area = driver.find_element(By.CLASS_NAME, "next-pagination-list")
        driver.execute_script("arguments[0].scrollIntoView();", pagination_area)
        time.sleep(2)  # Give time for elements to load

        # Locate and click the next page button
        next_page = driver.find_element(By.XPATH, next_page_xpath)
        driver.execute_script("arguments[0].click();", next_page)  # Use JavaScript to click
        time.sleep(5)  # Wait for the next page to load
        print(f"Moved to Page {page + 1}")
    except Exception as e:
        print(f"Next page not found. Stopping scrape. Error: {e}")
        break  # Exit the loop if there's an issue navigating to the next page

# Close the browser once scraping is done
driver.quit()

# Save the scraped data into a CSV file
df = pd.DataFrame(reviews_data, columns=["Reviewer Name", "Review Date", "Review Content"])
df.to_csv("lazada_sambal_nyet_reviews.csv", index=False, encoding="utf-8")

print(f"Scraping complete. {len(reviews_data)} reviews saved to lazada_sambal_nyet_reviews.csv")


Scraping Page 1...
Page 1 scraped. Total reviews: 5
Moved to Page 2
Scraping Page 2...
Page 2 scraped. Total reviews: 10
Moved to Page 3
Scraping Page 3...
Page 3 scraped. Total reviews: 15
Moved to Page 4
Scraping Page 4...
Page 4 scraped. Total reviews: 20
Moved to Page 5
Scraping Page 5...
Page 5 scraped. Total reviews: 25
Moved to Page 6
Scraping complete. 25 reviews saved to lazada_sambal_nyet_reviews.csv
