In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Lazada Product URL
url = "https://www.lazada.com.my/products/ugreen-braided-mfi-lightning-usb-a-to-lightning-cable-iphone-fast-charging-cable-for-iphoneipad-i12749949-s10763212739.html?scm=1007.17760.398138.0&pvid=b0c9f18f-0d7a-470d-ade2-c109c0d171ea&search=flashsale&spm=a2o4k.homepage.FlashSale.d_12749949"

# Initialize WebDriver
service = Service(r'C:\Users\User\Text Analytics\chromedriver-win64\chromedriver.exe')  # Use raw string
driver = webdriver.Chrome(service=service)
driver.maximize_window()

# Open the Lazada product page
driver.get(url)
time.sleep(5)  # Allow time for the page to load

# Function to scroll to the bottom of the page
def scroll_to_bottom():
    """Scroll to the bottom of the page to load reviews dynamically."""
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for new reviews to load

# Function to extract reviews from the Lazada page
def extract_reviews():
    """Extract reviews from the Lazada product page."""
    reviews = []  # List to store extracted reviews
    
    for page in range(1, 6):  # Scrape up to 5 pages
        print(f"Scraping page {page}...")
        
        scroll_to_bottom()  # Scroll down to load all content dynamically

        try:
            # Wait for review elements to load on the page
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "item"))
            )

            # Locate all review elements
            review_elements = driver.find_elements(By.CLASS_NAME, "item")
            
            for review in review_elements:
                try:
                    # Extract Reviewer Name
                    reviewer_name = review.find_element(By.CLASS_NAME, "middle").find_element(By.TAG_NAME, "span").text.strip()
                    
                    # Extract Review Date
                    date_element = review.find_elements(By.CLASS_NAME, "title")  
                    review_date = date_element[0].text.strip() if date_element else "Date not found"
                    
                    # Extract Review Content
                    content_element = review.find_elements(By.CLASS_NAME, "content")
                    review_content = content_element[0].text.strip() if content_element else "No review text"
                    
                    # Append the extracted data to the reviews list
                    reviews.append([reviewer_name, review_date, review_content])
                except Exception as e:
                    print(f"Skipping a review due to an error: {e}")
                    continue  # Skip to the next review in case of an error

            # Handle pagination for the "Next" button
            try:
                pagination_buttons = driver.find_elements(By.CSS_SELECTOR, "button.next-pagination-item")
                next_button = None

                # Identify the "Next" button
                for btn in pagination_buttons:
                    if "next" in btn.get_attribute("class"):
                        next_button = btn
                        break

                if next_button:
                    # Scroll the "Next" button into view
                    driver.execute_script("arguments[0].scrollIntoView();", next_button)
                    time.sleep(1)

                    # Attempt to click the button (fallback to JavaScript click if necessary)
                    try:
                        next_button.click()
                    except:
                        driver.execute_script("arguments[0].click();", next_button)

                    time.sleep(3)  # Allow time for the next page to load
                else:
                    print("No 'Next' button found. Ending pagination.")
                    break

            except Exception as e:
                print(f"Pagination error: {e}")
                break  # Stop if there's an issue with pagination

        except Exception as e:
            print(f"Error while processing page {page}: {e}")
            break  # Exit the loop if an error occurs during page processing

    return reviews


# Extract reviews
reviews = extract_reviews()

# Save reviews to CSV file
def save_reviews_to_csv(reviews, filename="lazada_reviews.csv"):
    """Save reviews to a CSV file."""
    # Convert list of reviews to a pandas DataFrame
    df = pd.DataFrame(reviews, columns=["Reviewer Name", "Review Date", "Review Content"])
    df.to_csv(filename, index=False, encoding="utf-8")
    print(f"Reviews saved to {filename}")

# Save to CSV
save_reviews_to_csv(reviews)

import os
print("Working directory:", os.getcwd())

for review in reviews:
    print(review)

# Close the WebDriver
driver.quit()


ModuleNotFoundError: No module named 'selenium'