Using Selenium To Scrape Data of A Dynamic Website With JavaScript Enabled in chrome

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
import csv

# Launch Chrome
driver = webdriver.Chrome()
driver.get("https://terrywhitechemmart.com.au/shop/products/skin-care")

wait = WebDriverWait(driver, 20)

try:
    # Wait for the product list container
    product_list = wait.until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "div.product-list"))
    )

    # Find all product links
    product_links = [
        a.get_attribute("href")
        for a in product_list.find_elements(By.CSS_SELECTOR, "mat-card.product-carousel-item-card a")
    ]

    # Limit to first 10 products
    product_links = product_links[:10]

    # Prepare storage
    all_products = []

    # Loop through each product
    for link in product_links:
        driver.get(link)
        time.sleep(2)  # short pause to ensure page loads

        # Brand
        try:
            brand_div = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div[itemprop='brand'] meta[itemprop='name']"))
            )
            brand_name = brand_div.get_attribute("content")
        except:
            brand_name = None

        # Product name
        try:
            product_elem = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "h1"))
            )
            product_name = product_elem.text
        except:
            product_name = None

        # Description
        try:
            description_elem = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "meta[itemprop='description']"))
            )
            description = description_elem.get_attribute("content")
        except:
            description = None

        # Image URL
        try:
            image_elem = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "link[itemprop='image']"))
            )
            image_url = image_elem.get_attribute("href")
        except:
            image_url = None

        # Price
        try:
            price_meta = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "meta[itemprop='price']"))
            )
            price_value = price_meta.get_attribute("content")
        except:
            price_value = None

        # Ingredients
        ingredients_text = None
        try:
            ingredients_panel = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//mat-panel-title[contains(text(),'Ingredients')]"))
            )
            ingredients_panel.click()
            time.sleep(1)  # wait for panel to expand

            ingredients_elems = wait.until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.mat-expansion-panel-body small.expand-panel-content"))
            )
            if len(ingredients_elems) >= 4:
                ingredients_text = ingredients_elems[3].text
            elif len(ingredients_elems) > 0:
                ingredients_text = ingredients_elems[0].text
        except:
            ingredients_text = None

        # Store scraped data
        product_data = {
            "Product_ID": link, #Unique identifier from URL 
            "Brand": brand_name, #Brand of the product
            "Product Name": product_name, #Brand of the product
            "Description": description, #Brand of the product
            "Image URL": image_url, #Direct image URLs
            "Price": price_value, #Direct image URLs
            "Ingredients": ingredients_text #Direct image URLs
        }
        all_products.append(product_data)

        # Print progress
        print(f"Scraped: {product_name}")

    # After scraping all products, save to CSV
    if all_products:
        keys = all_products[0].keys()
        file_exists = os.path.isfile("products.csv")
        with open("products.csv", "a", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=keys)
            if not file_exists:
                writer.writeheader()
            writer.writerows(all_products)

finally:
    driver.quit()


Scraped: 24 Daily Facial Cleansing Wipes Gentle 25 wipes^
Scraped: 24 Daily Facial Cleansing Wipes Moisturising 25 wipes^
Scraped: 24 Daily Pawpaw Ointment 25g^
Scraped: A bit Hippy Cleanser 500ml^
Scraped: A bit Hippy Conditioner 500ml^
Scraped: A bit Hippy Face Cream 65g^
Scraped: A bit Hippy Face Mist 100ml^
Scraped: A bit Hippy Face Oil 25ml^
Scraped: A bit Hippy Moisturiser 200g^
Scraped: A bit Hippy Oil Cleanser 100ml^


Clean And Group by Ingredients

In [2]:
import pandas as pd

# Read the original CSV
df = pd.read_csv("products.csv")

# Group by Ingredients and aggregate product details into lists
grouped_df = df.groupby("Ingredients").agg({
    "Product Name": lambda x: "; ".join(x),  # combine product names
    "Brand": lambda x: "; ".join(x),
    "Price": lambda x: "; ".join(x.astype(str)),
    "Description": lambda x: "; ".join(x.fillna("")),  # handle any missing descriptions
    "Image URL": lambda x: "; ".join(x.fillna(""))
}).reset_index()

# Save to a new CSV (does not overwrite the original)
grouped_df.to_csv("products_grouped_by_ingredients.csv", index=False, encoding="utf-8")

print("Grouped CSV saved as 'products_grouped_by_ingredients.csv'")


Grouped CSV saved as 'products_grouped_by_ingredients.csv'


Finally, Group The Products Into Specified Format Per Assessment Requirements

In [None]:
import string

# Read CSV
df = pd.read_csv("products.csv")

# Group by Ingredients
grouped = df.groupby("Ingredients").agg({
    "Product Name": lambda x: ", ".join(x)  # combine product names
}).reset_index()

# Assign group labels A, B, C ...
letters = list(string.ascii_uppercase)
grouped['Group'] = [letters[i] if i < len(letters) else f'Group_{i+1}' 
                    for i in range(len(grouped))]

# Reorder columns: Group | Ingredients | Product Names
grouped = grouped[['Group', 'Ingredients', 'Product Name']]

# Save to new CSV
grouped.to_csv("grouped ingredient table.csv", index=False, encoding="utf-8")

print("Grouped CSV saved as 'grouped ingredient table.csv'")


Grouped CSV saved as ' grouped ingredient table.csv'
