In [1]:
# Importing necessary libraries
import os  # Handling file and directory operations
import requests  # Sending HTTP requests to web pages
import pandas as pd  # Handling tabular data
from bs4 import BeautifulSoup  # Parsing HTML content
from tqdm import tqdm  # Displaying progress bars
from urllib.parse import urljoin  # Joining relative URLs with base
from PIL import Image  # Opening and saving image files
from io import BytesIO  # Handling image bytes in memory
import time  # Adding delays between requests
import spacy  # Performing NLP for filename shortening
import re  # Applying regex for filename cleaning
import random  # Generating random numbers for delays

In [2]:
# Defining category-wise Amazon search URLs
search_urls = {
    "Clothing": "https://www.amazon.co.uk/s?k=clothing",
    "Footwear": "https://www.amazon.co.uk/s?k=footwear",
    "Accessories": "https://www.amazon.co.uk/s?k=accessories",
    "Furniture": "https://www.amazon.co.uk/s?k=furniture",
    "Food Items": "https://www.amazon.co.uk/s?k=food+items",
    "Household Essentials": "https://www.amazon.co.uk/s?k=household+essentials",
    "Personal Care": "https://www.amazon.co.uk/s?k=personal+care",
    "Lighting": "https://www.amazon.co.uk/s?k=lighting",
    "Stationery": "https://www.amazon.co.uk/s?k=stationery",
    "Grocery": "https://www.amazon.co.uk/s?k=grocery"
}

In [None]:
# Defining headers to mimic a real browser and reduce blocking risk
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}

In [4]:
# Collecting product links by paginating through Amazon search results
def get_product_links(search_url, required_products=100):
    
    product_urls = set()  # Initializing a set to store unique product URLs
    current_page = 1  # Starting from page 1

    while len(product_urls) < required_products:

        # Creating paginated URL by appending page number
        paginated_url = f"{search_url}&page={current_page}"
        print(f"🔄 Fetching page {current_page}: {paginated_url}")
        
        # Sending request to Amazon with headers
        response = requests.get(paginated_url, headers=HEADERS)

        # Checking if response is successful
        if response.status_code != 200:
            print(f"❌ Failed to fetch page {current_page}")
            break

        # Parsing HTML content
        soup = BeautifulSoup(response.content, "html.parser")

        # Finding all product links on the current page
        links = soup.find_all("a", class_="a-link-normal s-no-outline")

        # Iterating through each link
        for link in links:
            href = link.get("href")
            if href:
                # Constructing full product URL
                full_url = urljoin("https://www.amazon.co.uk", href)
                product_urls.add(full_url)

            # Stopping if required number of URLs is reached
            if len(product_urls) >= required_products:
                break

        # Breaking the loop if no links are found (i.e., last page)
        if not links:
            print("⚠️ No more product links found.")
            break

        current_page += 1  # Moving to next page
        time.sleep(random.uniform(1.5, 3.0))  # Adding delay to avoid rate-limiting

    # Returning only the required number of product URLs
    return list(product_urls)[:required_products]

In [5]:
# Scraping title, price, image URL and description from a product page
def scrape_product_details(product_url):
    
    # Sending request to product page
    response = requests.get(product_url, headers=HEADERS)

    # Returning if response fails
    if response.status_code != 200:
        print(f"❌ Failed to fetch product page: {product_url}")
        return None

    # Parsing the HTML content
    soup = BeautifulSoup(response.content, "html.parser")

    # Extracting the product title
    title_element = soup.find("span", attrs={"id": "productTitle"})
    title = title_element.text.strip() if title_element else "Unknown"

    # Extracting the product price
    price = "N/A"
    try:
        price_element = soup.find("div", attrs={"class": "a-section a-spacing-none aok-align-center aok-relative"})
        if price_element:
            price = price_element.find("span", attrs={"class": "aok-offscreen"}).string.strip()
    except AttributeError:
        print(f"⚠️ Price not found for {title}, using default 'N/A'.")

    # Extracting one product image URL
    image_tag = soup.find("img", class_="a-dynamic-image")
    image_url = image_tag.get("data-old-hires") or image_tag.get("src") if image_tag else None

    # Extracting the product description
    description = "No description available"
    try:
        description_list = soup.find("ul", class_="a-unordered-list a-vertical a-spacing-mini")
        if not description_list:
            description_list = soup.find("ul", class_="a-unordered-list a-vertical a-spacing-small")

        if description_list:
            bullet_points = description_list.find_all("span", class_="a-list-item")
            description = " ".join([item.text.strip() for item in bullet_points if item.text.strip()])
    except AttributeError:
        print(f"⚠️ Description not found for {title}, using default.")

    # Returning collected product details
    return {
        "Title": title,
        "Price": price,
        "Product URL": product_url,
        "Image URL": image_url,
        "Description": description
    }

In [6]:
# Loading spaCy NLP model for smart filename shortening
nlp = spacy.load("en_core_web_sm")

# Creating short filenames using extracted keywords from product name
def smart_shorten_name(product_name, max_words=5):
    
    # Processing the product name using spaCy NLP
    doc = nlp(product_name)

    # Extracting nouns and proper nouns as keywords
    keywords = [token.text for token in doc if token.pos_ in {"NOUN", "PROPN"}]

    # Using adjectives if no nouns are found
    if not keywords:
        keywords = [token.text for token in doc if token.pos_ in {"ADJ"}]

    # Falling back to first 3 words if still empty
    if not keywords:
        keywords = product_name.split()[:3]

    # Joining keywords with underscores
    short_name = "_".join(keywords[:max_words])

    # Returning cleaned name or fallback to original with underscores
    return short_name if short_name.strip() else product_name.replace(" ", "_")

In [7]:
# Saving one image per product in a category-specific folder with serial numbers
def save_image(category, product_name, image_url, serial_number):
    
    # Creating category folder if not already present
    category_folder = os.path.join("product_images", category)
    os.makedirs(category_folder, exist_ok=True)

    try:
        # Downloading the image from the URL
        response = requests.get(image_url, headers=HEADERS)
        response.raise_for_status()

        # Opening image in memory
        image = Image.open(BytesIO(response.content))

        # Converting to RGB if image has transparency or palette
        if image.mode in ("P", "RGBA"):
            image = image.convert("RGB")

        # Generating a cleaned, NLP-based short filename
        cleaned_name = smart_shorten_name(product_name)
        cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '', cleaned_name)  # Removing special characters

        # Skipping if filename is empty
        if not cleaned_name.strip():
            print(f"⚠️ Skipping product '{product_name}' (no valid filename generated).")
            return None

        # Formatting filename with serial number
        serial_str = f"{serial_number:02d}"
        image_filename = f"{serial_str}_{cleaned_name}.jpg"
        image_path = os.path.join(category_folder, image_filename)
        image.save(image_path, "JPEG")
        print(f"Saved: {image_path}")
        return image_path

    except Exception as e:
        print(f"❌ Failed to download image: {e}")
        return None

In [8]:
# Scraping 100 unique products for each category and saving images
# Loading already-saved CSV if it exists
existing_categories = set()

if os.path.exists("product_data.csv"):
    existing_df = pd.read_csv("product_data.csv")
    existing_categories = set(existing_df['Category'].unique())
    all_products = existing_df.to_dict(orient="records")
    print(f"🔁 Resuming scraping. Skipping already-scraped categories: {existing_categories}")
else:
    all_products = []  # Initializing final product list

# Iterating through all categories
for category, search_url in search_urls.items():
    if category in existing_categories:
        print(f"⏭️ Skipping {category} (already scraped)")
        continue

    print(f"\n🔍 Scraping category: {category}")

    # Collecting up to 100 product links via pagination
    product_links = get_product_links(search_url, required_products=150)
    valid_products = []
    unique_product_titles = set()

    # Looping through collected product URLs
    for product_url in tqdm(product_links, desc=f"Scraping {category} Products"):
        if len(valid_products) >= 100:
            break

        # Scraping details from product page
        product_data = scrape_product_details(product_url)

        # Checking if data and image are valid
        if product_data and product_data["Image URL"]:
            product_title = product_data["Title"]

            # Generating NLP-shortened name
            shortened_name = smart_shorten_name(product_title)

            # Skipping duplicates based on shortened name
            if shortened_name in unique_product_titles:
                print(f"⚠️ Skipping duplicate product: {shortened_name}")
                continue  

            # Saving the product image
            serial_number = len(valid_products) + 1
            saved_image = save_image(category, shortened_name, product_data["Image URL"], serial_number)


            # Adding product to the list if image is saved successfully
            if saved_image:
                product_data["Title"] = shortened_name  
                product_data["Saved Image"] = saved_image
                product_data["Category"] = category

                valid_products.append(product_data)
                unique_product_titles.add(shortened_name)  

    # Adding all valid products from this category to the master list
    all_products.extend(valid_products)
    print(f"✅ Collected {len(valid_products)} unique products for {category}")

    # Saving to CSV after each category
    columns = ["Category", "Title", "Price", "Product URL", "Image URL", "Saved Image", "Description"]
    df_partial = pd.DataFrame(all_products)[columns]
    df_partial.to_csv("product_data.csv", index=False)
    print(f"✅ Saved {len(df_partial)} total products so far to product_data.csv")
    
    time.sleep(5)  # Adding delay between categories

🔁 Resuming scraping. Skipping already-scraped categories: {'Lighting', 'Clothing', 'Footwear', 'Furniture', 'Food Items', 'Stationery', 'Accessories', 'Household Essentials', 'Personal Care'}
⏭️ Skipping Clothing (already scraped)
⏭️ Skipping Footwear (already scraped)
⏭️ Skipping Accessories (already scraped)
⏭️ Skipping Furniture (already scraped)
⏭️ Skipping Food Items (already scraped)
⏭️ Skipping Household Essentials (already scraped)
⏭️ Skipping Personal Care (already scraped)
⏭️ Skipping Lighting (already scraped)
⏭️ Skipping Stationery (already scraped)

🔍 Scraping category: Grocery
🔄 Fetching page 1: https://www.amazon.co.uk/s?k=grocery&page=1
🔄 Fetching page 2: https://www.amazon.co.uk/s?k=grocery&page=2
🔄 Fetching page 3: https://www.amazon.co.uk/s?k=grocery&page=3


Scraping Grocery Products:   1%|          | 1/150 [00:05<12:35,  5.07s/it]

Saved: product_images\Grocery\01_Mr_Muscle_Drain_Unblocker_Sink.jpg


Scraping Grocery Products:   1%|▏         | 2/150 [00:08<10:12,  4.14s/it]

Saved: product_images\Grocery\02_La_Tazza_Doro_Coffee_Beans.jpg


Scraping Grocery Products:   2%|▏         | 3/150 [00:13<11:00,  4.49s/it]

Saved: product_images\Grocery\03_Amazon_Gold_Instant_Coffee_Medium.jpg


Scraping Grocery Products:   3%|▎         | 4/150 [00:17<10:48,  4.44s/it]

Saved: product_images\Grocery\04_Amazon_Espresso_Crema_Light_Roast.jpg


Scraping Grocery Products:   3%|▎         | 5/150 [00:22<10:46,  4.46s/it]

Saved: product_images\Grocery\05_Amazon_English_Mustard.jpg


Scraping Grocery Products:   4%|▍         | 6/150 [00:27<11:19,  4.72s/it]

Saved: product_images\Grocery\06_Heinz_Tomato_Ketchup_g_Pack.jpg


Scraping Grocery Products:   5%|▍         | 7/150 [00:31<10:48,  4.53s/it]

Saved: product_images\Grocery\07_Persil_Bio_Laundry_Washing_Liquid.jpg


Scraping Grocery Products:   5%|▌         | 8/150 [00:36<10:39,  4.50s/it]

Saved: product_images\Grocery\08_BARR_Pack_American_Cream_Soda.jpg


Scraping Grocery Products:   6%|▌         | 9/150 [00:39<09:28,  4.03s/it]

Saved: product_images\Grocery\09_Bananas_pack.jpg


Scraping Grocery Products:   7%|▋         | 10/150 [00:42<09:14,  3.96s/it]

Saved: product_images\Grocery\10_Foldable_Shopping_Trolley_Thermal_Insulation.jpg


Scraping Grocery Products:   7%|▋         | 11/150 [00:47<09:29,  4.10s/it]

Saved: product_images\Grocery\11_MANUKA_DOCTOR_MGO_Manuka_Honey.jpg


Scraping Grocery Products:   8%|▊         | 12/150 [00:52<10:03,  4.37s/it]

Saved: product_images\Grocery\12_Ben_s_Original_Plant_Powered.jpg


Scraping Grocery Products:   9%|▊         | 13/150 [00:55<09:07,  4.00s/it]

Saved: product_images\Grocery\13_Morrisons_Chicken_Ham_Hock_Leek.jpg


Scraping Grocery Products:   9%|▉         | 14/150 [00:59<09:01,  3.98s/it]

Saved: product_images\Grocery\14_Dettol_Antibacterial_Cleaning_Wipes_Summer.jpg


Scraping Grocery Products:  10%|█         | 15/150 [01:03<09:07,  4.05s/it]

Saved: product_images\Grocery\15_Amazon_Blueberry_Jam.jpg


Scraping Grocery Products:  11%|█         | 16/150 [01:07<09:02,  4.05s/it]

Saved: product_images\Grocery\16_Weight_Takeaway_Favourites_Chicken_Tikka.jpg


Scraping Grocery Products:  11%|█▏        | 17/150 [01:12<09:29,  4.29s/it]

Saved: product_images\Grocery\17_Finish_Ultimate_Infinity_Shine_Dishwasher.jpg


Scraping Grocery Products:  12%|█▏        | 18/150 [01:16<08:57,  4.07s/it]

Saved: product_images\Grocery\18_Amazon_Squeezy_Naturally_Sweet_Honey.jpg


Scraping Grocery Products:  13%|█▎        | 19/150 [01:20<09:18,  4.26s/it]

Saved: product_images\Grocery\19_Nutella_Hazelnut_Chocolate_Spread_Jar.jpg


Scraping Grocery Products:  13%|█▎        | 20/150 [01:25<09:30,  4.39s/it]

Saved: product_images\Grocery\20_Andrex_Fresh_Sensitive_Hygiene_Wipes.jpg


Scraping Grocery Products:  14%|█▍        | 21/150 [01:29<09:03,  4.21s/it]

Saved: product_images\Grocery\21_Amazon_Tagliatelle.jpg


Scraping Grocery Products:  15%|█▍        | 22/150 [01:33<08:53,  4.17s/it]

Saved: product_images\Grocery\22_Amazon_Halves_Fruit_Juice_g.jpg


Scraping Grocery Products:  15%|█▌        | 23/150 [01:36<08:27,  4.00s/it]

⚠️ Skipping duplicate product: Dettol_Antibacterial_Cleaning_Wipes_Summer


Scraping Grocery Products:  16%|█▌        | 24/150 [01:41<08:45,  4.17s/it]

Saved: product_images\Grocery\23_Nature_Valley_Crunchy_Oats_Honey.jpg


Scraping Grocery Products:  17%|█▋        | 25/150 [01:46<09:03,  4.35s/it]

Saved: product_images\Grocery\24_NESCAF_Dolce_Gusto_Flat_White.jpg


Scraping Grocery Products:  17%|█▋        | 26/150 [01:49<08:25,  4.08s/it]

⚠️ Skipping duplicate product: Weight_Takeaway_Favourites_Chicken_Tikka


Scraping Grocery Products:  18%|█▊        | 27/150 [01:53<08:19,  4.06s/it]

Saved: product_images\Grocery\25_Hellmann_Professional_5L.jpg


Scraping Grocery Products:  19%|█▊        | 28/150 [01:57<08:17,  4.08s/it]

Saved: product_images\Grocery\26_Batchelors_Super_Noodles_Chicken_Mushroom.jpg


Scraping Grocery Products:  19%|█▉        | 29/150 [02:01<08:00,  3.97s/it]

Saved: product_images\Grocery\27_Olly_Pretzel_Thins_New_Variety.jpg


Scraping Grocery Products:  20%|██        | 30/150 [02:05<08:00,  4.01s/it]

Saved: product_images\Grocery\28_Weetabix_Wholegrain_Biscuits_Pack.jpg


Scraping Grocery Products:  21%|██        | 31/150 [02:09<08:00,  4.04s/it]

Saved: product_images\Grocery\29_itsu_Sriracha_Flavour_Seaweed_Thins.jpg


Scraping Grocery Products:  21%|██▏       | 32/150 [02:14<08:33,  4.35s/it]

Saved: product_images\Grocery\30_Finish_Ultimate_Plus_Infinity_Shine.jpg


Scraping Grocery Products:  22%|██▏       | 33/150 [02:17<07:41,  3.94s/it]

⚠️ Skipping duplicate product: La_Tazza_D'oro_Coffee_Beans


Scraping Grocery Products:  23%|██▎       | 34/150 [02:21<07:34,  3.92s/it]

Saved: product_images\Grocery\31_R_Whites_Premium_Lemonade_2L.jpg


Scraping Grocery Products:  23%|██▎       | 35/150 [02:26<07:49,  4.09s/it]

Saved: product_images\Grocery\32_Crap__Twice_Long_Toilet.jpg


Scraping Grocery Products:  24%|██▍       | 36/150 [02:30<07:36,  4.00s/it]

Saved: product_images\Grocery\33_Peroni_Nastro_Azzurro_Lager_330ml.jpg


Scraping Grocery Products:  25%|██▍       | 37/150 [02:33<07:03,  3.75s/it]

⚠️ Skipping duplicate product: La_Tazza_D'oro_Coffee_Beans


Scraping Grocery Products:  25%|██▌       | 38/150 [02:37<07:20,  3.93s/it]

Saved: product_images\Grocery\34_Dr_Beckmann_Carpet_Stain_Remover.jpg


Scraping Grocery Products:  26%|██▌       | 39/150 [02:40<06:46,  3.67s/it]

Saved: product_images\Grocery\35_Morrisons_Market_Street_British_Chicken.jpg


Scraping Grocery Products:  27%|██▋       | 40/150 [02:44<07:05,  3.87s/it]

Saved: product_images\Grocery\36_Vita_Coco_Original_Coconut_Water.jpg


Scraping Grocery Products:  27%|██▋       | 41/150 [02:48<06:41,  3.69s/it]

⚠️ Skipping duplicate product: La_Tazza_D'oro_Coffee_Beans


Scraping Grocery Products:  28%|██▊       | 42/150 [02:51<06:33,  3.65s/it]

Saved: product_images\Grocery\37_Panana_Foldable_Shopping_Trolley_Cart.jpg


Scraping Grocery Products:  29%|██▊       | 43/150 [02:55<06:18,  3.54s/it]

Saved: product_images\Grocery\38_Morrisons_Free_Range_Eggs_count.jpg


Scraping Grocery Products:  29%|██▉       | 44/150 [02:57<05:53,  3.34s/it]

Saved: product_images\Grocery\39_Morrisons_Stonebaked_BBQ_Chicken_g.jpg


Scraping Grocery Products:  30%|███       | 45/150 [03:01<06:13,  3.56s/it]

Saved: product_images\Grocery\40_Amazon_Beef_Gravy_Granules_g.jpg


Scraping Grocery Products:  31%|███       | 46/150 [03:05<06:23,  3.68s/it]

Saved: product_images\Grocery\41_Maynards_Bassetts_Sports_Mix_g.jpg


Scraping Grocery Products:  31%|███▏      | 47/150 [03:10<06:39,  3.88s/it]

Saved: product_images\Grocery\42_Kenco_Decaff_Instant_Coffee_Refill.jpg


Scraping Grocery Products:  32%|███▏      | 48/150 [03:13<06:23,  3.76s/it]

Saved: product_images\Grocery\43_Princes_Tuna_Chunks_Spring_Water.jpg


Scraping Grocery Products:  33%|███▎      | 49/150 [03:18<06:39,  3.96s/it]

Saved: product_images\Grocery\44_J2O_Fruit_Juice_Orange_Passion.jpg


Scraping Grocery Products:  33%|███▎      | 50/150 [03:22<06:44,  4.05s/it]

Saved: product_images\Grocery\45_Dettol_Washing_Machine_Cleaner.jpg


Scraping Grocery Products:  34%|███▍      | 51/150 [03:26<06:28,  3.92s/it]

Saved: product_images\Grocery\46_White_Potatoes_kg.jpg


Scraping Grocery Products:  35%|███▍      | 52/150 [03:30<06:43,  4.12s/it]

Saved: product_images\Grocery\47_WASABI_O_Premium_Fusion_Mayonnaise.jpg


Scraping Grocery Products:  35%|███▌      | 53/150 [03:34<06:37,  4.10s/it]

⚠️ Skipping duplicate product: Weight_Takeaway_Favourites_Chicken_Tikka


Scraping Grocery Products:  36%|███▌      | 54/150 [03:38<06:27,  4.04s/it]

Saved: product_images\Grocery\48_Barista_Edition_Pack_Long_Life.jpg


Scraping Grocery Products:  37%|███▋      | 55/150 [03:43<06:48,  4.30s/it]

Saved: product_images\Grocery\49_SlimFast_Ready_Shake_Meal_Replacement.jpg


Scraping Grocery Products:  37%|███▋      | 56/150 [03:47<06:42,  4.29s/it]

⚠️ Skipping duplicate product: Kenco_Decaff_Instant_Coffee_Refill


Scraping Grocery Products:  38%|███▊      | 57/150 [03:51<06:28,  4.18s/it]

⚠️ Skipping duplicate product: Dettol_Antibacterial_Cleaning_Wipes_Summer


Scraping Grocery Products:  39%|███▊      | 58/150 [03:55<06:21,  4.14s/it]

Saved: product_images\Grocery\50_Heinz_Classic_Cream_Chicken_Soup.jpg


Scraping Grocery Products:  39%|███▉      | 59/150 [04:00<06:30,  4.29s/it]

Saved: product_images\Grocery\51_Tetley_Tea_Bags.jpg


Scraping Grocery Products:  40%|████      | 60/150 [04:04<06:20,  4.23s/it]

⚠️ Skipping duplicate product: Nutella_Hazelnut_Chocolate_Spread_Jar


Scraping Grocery Products:  41%|████      | 61/150 [04:08<06:10,  4.16s/it]

Saved: product_images\Grocery\52_Amazon_Fresh_Fabric_Conditioner_washes.jpg


Scraping Grocery Products:  41%|████▏     | 62/150 [04:11<05:44,  3.92s/it]

Saved: product_images\Grocery\53_Morrisons_Large_Free_Range_Eggs.jpg


Scraping Grocery Products:  42%|████▏     | 63/150 [04:15<05:37,  3.88s/it]

Saved: product_images\Grocery\54_Pepsi_Max_No_Sugar_Cola.jpg


Scraping Grocery Products:  43%|████▎     | 64/150 [04:20<06:03,  4.22s/it]

Saved: product_images\Grocery\55_Carpatica_Pure_Natural_Mineral_Water.jpg


Scraping Grocery Products:  43%|████▎     | 65/150 [04:25<06:03,  4.27s/it]

Saved: product_images\Grocery\56_Amazon_Dijon_Mustard_g_Spicy.jpg


Scraping Grocery Products:  44%|████▍     | 66/150 [04:29<05:53,  4.21s/it]

Saved: product_images\Grocery\57_Nature_Valley_Protein_Peanut_Chocolate.jpg


Scraping Grocery Products:  45%|████▍     | 67/150 [04:31<05:15,  3.80s/it]

Saved: product_images\Grocery\58_Morrisons_Beef.jpg


Scraping Grocery Products:  45%|████▌     | 68/150 [04:35<05:11,  3.80s/it]

⚠️ Skipping duplicate product: Weight_Takeaway_Favourites_Chicken_Tikka


Scraping Grocery Products:  46%|████▌     | 69/150 [04:40<05:36,  4.15s/it]

Saved: product_images\Grocery\59_Andrex_Gentle_Clean_Toilet_Rolls.jpg


Scraping Grocery Products:  47%|████▋     | 70/150 [04:44<05:26,  4.08s/it]

Saved: product_images\Grocery\60_Ritz_Original_Cracker_Box_Classic.jpg


Scraping Grocery Products:  47%|████▋     | 71/150 [04:48<05:19,  4.05s/it]

Saved: product_images\Grocery\61_Ko_Lee_Instant_Noodles_Chicken.jpg


Scraping Grocery Products:  48%|████▊     | 72/150 [04:52<05:10,  3.98s/it]

Saved: product_images\Grocery\62_GoGo_squeeZ_Multifruit_Fruit_Smoothie.jpg


Scraping Grocery Products:  49%|████▊     | 73/150 [04:56<05:06,  3.98s/it]

Saved: product_images\Grocery\63_Heinz_Baked_Beanz_g_Pack.jpg


Scraping Grocery Products:  49%|████▉     | 74/150 [05:01<05:28,  4.32s/it]

Saved: product_images\Grocery\64_Amazon_ULTRA_Kitchen_Roll_Extra.jpg


Scraping Grocery Products:  50%|█████     | 75/150 [05:05<05:25,  4.34s/it]

Saved: product_images\Grocery\65_Happy_Snack_Company_Roasted_Fava.jpg


Scraping Grocery Products:  51%|█████     | 76/150 [05:09<05:11,  4.21s/it]

Saved: product_images\Grocery\66_CLIF_BAR_Nut_Butter_Bar.jpg


Scraping Grocery Products:  51%|█████▏    | 77/150 [05:14<05:08,  4.22s/it]

Saved: product_images\Grocery\67_IV_Hydration_Sachets__Lemon.jpg


Scraping Grocery Products:  52%|█████▏    | 78/150 [05:17<04:53,  4.08s/it]

Saved: product_images\Grocery\68_Amazon_Chicken_Gravy_Granules_g.jpg


Scraping Grocery Products:  53%|█████▎    | 79/150 [05:22<04:54,  4.14s/it]

Saved: product_images\Grocery\69_Andrex_Family_Soft_Toilet_Paper.jpg


Scraping Grocery Products:  53%|█████▎    | 80/150 [05:26<04:48,  4.12s/it]

Saved: product_images\Grocery\70_Amazon_Gentle_Moist_Toilet_Tissues.jpg


Scraping Grocery Products:  54%|█████▍    | 81/150 [05:29<04:30,  3.92s/it]

Saved: product_images\Grocery\71_Lucozade_Energy_Zero_Sugar_Drink.jpg


Scraping Grocery Products:  55%|█████▍    | 82/150 [05:33<04:23,  3.88s/it]

Saved: product_images\Grocery\72_Amazon_MSC_Wild_Pacific_Pink.jpg


Scraping Grocery Products:  55%|█████▌    | 83/150 [05:37<04:25,  3.96s/it]

Saved: product_images\Grocery\73_Dettol_Antibacterial_Washing_Machine_Cleaner.jpg


Scraping Grocery Products:  56%|█████▌    | 84/150 [05:41<04:15,  3.88s/it]

Saved: product_images\Grocery\74_Robinsons_Double_Strength_Orange_Pineapple.jpg


Scraping Grocery Products:  57%|█████▋    | 85/150 [05:46<04:44,  4.37s/it]

Saved: product_images\Grocery\75_Old_El_Paso_Chili_Seasoning.jpg


Scraping Grocery Products:  57%|█████▋    | 86/150 [05:51<04:41,  4.41s/it]

Saved: product_images\Grocery\76_Complete_Clean_Toilet_Roll_Rolls.jpg


Scraping Grocery Products:  58%|█████▊    | 87/150 [05:55<04:26,  4.24s/it]

⚠️ Skipping duplicate product: Andrex_Fresh_Sensitive_Hygiene_Wipes


Scraping Grocery Products:  59%|█████▊    | 88/150 [05:59<04:18,  4.18s/it]

⚠️ Skipping duplicate product: Kenco_Decaff_Instant_Coffee_Refill


Scraping Grocery Products:  59%|█████▉    | 89/150 [06:03<04:11,  4.12s/it]

Saved: product_images\Grocery\77_Heinz_Cream_Tomato_Soup_g.jpg


Scraping Grocery Products:  60%|██████    | 90/150 [06:06<03:51,  3.86s/it]

Saved: product_images\Grocery\78_Morrisons_Multi_Kitchen_Towel_Pack.jpg


Scraping Grocery Products:  61%|██████    | 91/150 [06:10<03:49,  3.90s/it]

Saved: product_images\Grocery\79_Amazon_Tomato_Puree.jpg


Scraping Grocery Products:  61%|██████▏   | 92/150 [06:14<03:57,  4.09s/it]

Saved: product_images\Grocery\80_Barista_Coffee_Co_Medium_Roast.jpg


Scraping Grocery Products:  62%|██████▏   | 93/150 [06:17<03:33,  3.74s/it]

Saved: product_images\Grocery\81_Whole_Cucumber.jpg


Scraping Grocery Products:  63%|██████▎   | 94/150 [06:21<03:31,  3.78s/it]

Saved: product_images\Grocery\82_Colgate_Extra_Clean_Medium_Manual.jpg


Scraping Grocery Products:  63%|██████▎   | 95/150 [06:25<03:27,  3.77s/it]

Saved: product_images\Grocery\83_Robinsons_Double_Strength_Apple_Blackcurrant.jpg


Scraping Grocery Products:  64%|██████▍   | 96/150 [06:28<03:09,  3.50s/it]

Saved: product_images\Grocery\84_Morrisons_Teddy_Shapes_g.jpg


Scraping Grocery Products:  65%|██████▍   | 97/150 [06:32<03:13,  3.65s/it]

Saved: product_images\Grocery\85_Amazon_Basil_Pesto_Green_Pesto.jpg


Scraping Grocery Products:  65%|██████▌   | 98/150 [06:35<03:10,  3.66s/it]

⚠️ Skipping duplicate product: Finish_Ultimate_Plus_Infinity_Shine


Scraping Grocery Products:  66%|██████▌   | 99/150 [06:39<03:11,  3.75s/it]

Saved: product_images\Grocery\86_Amazon_Soy_Sauce_Dark150ml_Asian.jpg


Scraping Grocery Products:  67%|██████▋   | 100/150 [06:44<03:16,  3.94s/it]

Saved: product_images\Grocery\87_Heinz_Baked_Beanz_Snap_Pots.jpg


Scraping Grocery Products:  67%|██████▋   | 101/150 [06:48<03:15,  3.99s/it]

Saved: product_images\Grocery\88_Werther_Original_Caramel_Popcorn_Gourmet.jpg


Scraping Grocery Products:  68%|██████▊   | 102/150 [06:52<03:07,  3.90s/it]

Saved: product_images\Grocery\89_LOVE_CORN_Limited_Edition_Minions.jpg


Scraping Grocery Products:  69%|██████▊   | 103/150 [06:55<03:00,  3.85s/it]

Saved: product_images\Grocery\90_Amazon_Fruit_Mix_Syrup.jpg


Scraping Grocery Products:  69%|██████▉   | 104/150 [07:00<03:07,  4.08s/it]

Saved: product_images\Grocery\91_Amazon_Sunflower_Oil_1L.jpg


Scraping Grocery Products:  70%|███████   | 105/150 [07:04<03:09,  4.21s/it]

Saved: product_images\Grocery\92_Clif_Bars_Energy_Bar_Nutritional.jpg


Scraping Grocery Products:  71%|███████   | 106/150 [07:09<03:15,  4.45s/it]

Saved: product_images\Grocery\93_Surf_Tropical_Lily_Washing_Powder.jpg


Scraping Grocery Products:  71%|███████▏  | 107/150 [07:13<03:05,  4.30s/it]

Saved: product_images\Grocery\94_Amazon_Baby_Carrots_Water.jpg


Scraping Grocery Products:  72%|███████▏  | 108/150 [07:18<03:06,  4.44s/it]

Saved: product_images\Grocery\95_TRIP_Mindful_Blends_Pack_Sparkling.jpg


Scraping Grocery Products:  73%|███████▎  | 109/150 [07:22<02:49,  4.12s/it]

Saved: product_images\Grocery\96_Domestos_Citrus_Fresh_Thick_Bleach.jpg


Scraping Grocery Products:  73%|███████▎  | 110/150 [07:26<02:44,  4.10s/it]

⚠️ Skipping duplicate product: Ben_’s_Original_Plant_Powered


Scraping Grocery Products:  74%|███████▍  | 111/150 [07:30<02:39,  4.08s/it]

Saved: product_images\Grocery\97_Heinz_Good_Mayonnaise_g.jpg


Scraping Grocery Products:  75%|███████▍  | 112/150 [07:34<02:39,  4.19s/it]

Saved: product_images\Grocery\98_Listerine_Total_Care_Milder_Taste.jpg


Scraping Grocery Products:  75%|███████▌  | 113/150 [07:38<02:36,  4.23s/it]

Saved: product_images\Grocery\99_Super_Stix_Ube_Flavour_Wafer.jpg


Scraping Grocery Products:  76%|███████▌  | 114/150 [07:43<02:26,  4.06s/it]

Saved: product_images\Grocery\100_Nestle_Pure_Life_Spring_Water.jpg
✅ Collected 100 unique products for Grocery





✅ Saved 1000 total products so far to product_data.csv
