In [6]:
pip install selenium selenium-wire selenium-stealth undetected-chromedriver webdriver-manager

Collecting selenium-wire
  Downloading selenium_wire-5.1.0-py3-none-any.whl.metadata (49 kB)
Collecting selenium-stealth
  Downloading selenium_stealth-1.0.6-py3-none-any.whl.metadata (6.4 kB)
Collecting blinker>=1.4 (from selenium-wire)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting brotli>=1.0.9 (from selenium-wire)
  Downloading Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl.metadata (5.5 kB)
Collecting kaitaistruct>=0.7 (from selenium-wire)
  Downloading kaitaistruct-0.11-py2.py3-none-any.whl.metadata (2.9 kB)
Collecting pyasn1>=0.3.1 (from selenium-wire)
  Downloading pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB)
Collecting pyOpenSSL>=22.0.0 (from selenium-wire)
  Downloading pyopenssl-25.3.0-py3-none-any.whl.metadata (17 kB)
Collecting zstandard>=0.14.1 (from selenium-wire)
  Downloading zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl.metadata (3.3 kB)
Collecting h2>=4.0 (from selenium-wire)
  Downloading h2-4.3.0-py3-none-any.whl.metadata (5

In [3]:
import random
import time
from selenium_stealth import stealth
from selenium import webdriver
# --- 1. User-Agent Setup ---
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
]
random_user_agent = random.choice(USER_AGENTS)

# --- 2. Proxy Setup ---
# Note: Replace USERNAME and PASSWORD with your actual credentials
#proxy_options = {
#    'proxy': {
#        'no_proxy': 'localhost,127.0.0.1' # Bypasses proxy for local traffic
#    }
#}

# --- 3. Chrome Options Setup ---
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument(f"--user-agent={random_user_agent}")
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument("--headless") # Commented out to see the browser
chrome_options.add_argument("--lang=en-US,en;q=0.9")

# Basic anti-detection options
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)

# Load only DOM content
#chrome_options.page_load_strategy = 'eager' 

# Initialize driver with selenium-wire
driver = webdriver.Chrome(
    options=chrome_options,
    #seleniumwire_options=proxy_options
)

# --- 4. Stealth Setup ---
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# --- 5. Run the Scraper ---
print("Scraper is running with a stealth configuration!")
print(f"Using IP from proxy and User-Agent: {random_user_agent}")

Scraper is running with a stealth configuration!
Using IP from proxy and User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36


In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import json
from selenium.webdriver.common.keys import Keys

In [8]:
#### Refactored Store Scraping Functions with Try-Catch ####

def get_filtered_products(products_details, word_to_search):
    """
    Filter products for relevance based on search term
    Returns: list of relevant products
    """
    filtered = []
    search_term_lower = word_to_search.lower()
    
    for p in products_details:
        title_lower = p["name"].lower()
        if search_term_lower in title_lower:
            filtered.append(p)
            
    return filtered


def scrape_al_fateh(driver, word_to_search, wait_time=10):
    """
    Scrape Al-Fatah store for products
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Al-Fateh"
        AL_FATEH_GROCERY_URL = f"https://alfatah.pk/search?q={word_to_search}"
        
        driver.get(AL_FATEH_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        product_cards = wait.until(EC.presence_of_all_elements_located(
            (By.CSS_SELECTOR, ".col-6.col-sm-4.col-md-3.col-lg-2")
        ))
        
        products_details = []
        for product in product_cards:
            try:
                a_element = product.find_element(By.CSS_SELECTOR, "a[class='product-title-ellipsis']")
                product_link = a_element.get_attribute("href")
                product_name = a_element.text
                product_price= product.find_element(By.CLASS_NAME, "product-price").text
                products_details.append({
                    "store": store_name,
                    "name": product_name,
                    "link": product_link,
                    "price": product_price
                })
            except Exception as e:
                print(f"[{store_name}] Error extracting product: {str(e)}")
                continue
        
        # Filter for relevance
        filtered_products = get_filtered_products(products_details, word_to_search)
        
        print(f"[{store_name}] Found {len(filtered_products)} relevant products")
        return filtered_products
        
    except Exception as e:
        print(f"[Al-Fateh] Error during scraping: {str(e)}")
        return []


def scrape_metro(driver, word_to_search, wait_time=10):
    """
    Scrape Metro store for products with name and price
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Metro"
        METRO_GROCERY_URL = f"https://www.metro-online.pk/search/{word_to_search}?searchText={word_to_search}"
        
        driver.get(METRO_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        # Search for product
        #input_box = wait.until(EC.presence_of_element_located(
         #   (By.CLASS_NAME, "newNavbar_nav_search__LBtcn")
        #))
        #input_box.clear()
       # input_box.send_keys(word_to_search)
        #input_box.send_keys(Keys.RETURN)
        
        # Get product cards
        product_cards = wait.until(EC.presence_of_all_elements_located(
            (By.CLASS_NAME, "CategoryGrid_product_card__FUMXW")
        ))

        products_details = []

        for product_card in product_cards:
            try:
                product_link=product_card.find_element(By.TAG_NAME, "a").get_attribute("href")
                name = product_card.find_element(By.CLASS_NAME, "CategoryGrid_product_name__3nYsN").text
                price = product_card.find_element(By.CLASS_NAME, "CategoryGrid_product_price__Svf8T").text
                products_details.append({
                        "store": store_name,
                        "name": name,
                        "link": product_link,
                        "price": price
                    })
            except Exception as e:
                print(f"[{store_name}] Error extracting product details: {str(e)}")
                continue
        
        
        # Filter for relevance
        filtered_products = get_filtered_products(products_details, word_to_search)

        print(f"[{store_name}] Found {len(filtered_products)} products")
        return filtered_products

    except Exception as e:
        print(f"[Metro] Error during scraping: {str(e)}")
        return []


def scrape_jalalsons(driver, word_to_search, wait_time=10):
    """
    Scrape Jalal Sons store for products with name and price
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Jalal Sons"
        JALALSONS_GROCERY_URL = f"https://jalalsons.com.pk/shop?query={word_to_search}"
        
        driver.get(JALALSONS_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        # Close banner if present
        try:
            banner_close_button = wait.until(EC.element_to_be_clickable(
                (By.CSS_SELECTOR, ".cursor-pointer.ms-auto")
            ))
            banner_close_button.click()
        except TimeoutException:
            print(f"[{store_name}] No banner appeared")
        
        # Select location from dropdown
        try:
            from selenium.webdriver.support.ui import Select
            location_dropdown = wait.until(EC.presence_of_element_located(
                (By.ID, "selectDeliveryBranch")
            ))
            select_object = Select(location_dropdown)
            all_options = select_object.options
            
            enabled_options = [
                opt for opt in all_options
                if opt.is_enabled() and opt.get_attribute('value') != ""
            ]
            
            if enabled_options:
                random_option = random.choice(enabled_options)
                select_object.select_by_visible_text(random_option.text)
                
                try:
                    submit_button = driver.find_element(By.CLASS_NAME, "current_loc_pop_btn")
                    submit_button.click()
                except Exception as e:
                    print(f"[{store_name}] No button to confirm location selection: {str(e)}")
        except:
            print(f"[{store_name}] No location box appeared")
        
        # Get products
        product_cards = wait.until(EC.presence_of_all_elements_located(
            (By.CLASS_NAME, "single_product_theme")
        ))

        products_details = []

        for product_card in product_cards:
            try:
                product_link = product_card.find_element(By.TAG_NAME, "a").get_attribute("href")
                name = product_card.find_element(By.CLASS_NAME, "product_name_theme").text
                
                currency = product_card.find_element(By.CLASS_NAME, "item-currency").text
                value = product_card.find_element(By.CLASS_NAME, "price-value").text
                price = f"{currency} {value.strip()}"
                
                products_details.append({
                    "store": store_name,
                    "name": name,
                    "link": product_link,
                    "price": price
                })
            except Exception as e:
                print(f"[{store_name}] Error extracting product details: {str(e)}")
                continue
        
        
        filtered_products = get_filtered_products(products_details, word_to_search)
        print(f"[{store_name}] Found {len(filtered_products)} products")
        return filtered_products

    except Exception as e:
        print(f"[Jalal Sons] Error during scraping: {str(e)}")
        return []


def scrape_carrefour(driver, word_to_search, wait_time=10):
    """
    Scrape Carrefour store for products with name and price
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Carrefour"
        CAREFOUR_GROCERY_URL = f"https://www.carrefour.pk/mafpak/en/search?keyword={word_to_search}"
        
        driver.get(CAREFOUR_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        product = wait.until(EC.presence_of_element_located(
            (By.XPATH, "/html/body/div[1]/main/div/div[2]/div[2]/div/div[2]/div/div")
        ))
        
        
        
        product_links = set()
        for link in product.find_elements(By.TAG_NAME, "a"):
            href = link.get_attribute("href")
            if href:
                product_links.add(href)

        products_details = []
        for link in product_links:
            try:
                driver.get(link)
                name = driver.find_element(
                    By.XPATH, "/html/body/div[1]/main/div/div[3]/div/div[2]/h1"
                ).text
                price = driver.find_element(
                    By.XPATH, "/html/body/div[1]/main/div/div[3]/div/div[3]/div[1]/div[1]"
                ).text
                
                products_details.append({
                    "store": store_name,
                    "name": name,
                    "link": link,
                    "price": price
                })
            except Exception as e:
                print(f"[{store_name}] Error extracting product details: {str(e)}")
                continue
        
        filtered_products = get_filtered_products(products_details, word_to_search)
        print(f"[{store_name}] Found {len(filtered_products)} products")
        return filtered_products
        
    except Exception as e:
        print(f"[Carrefour] Error during scraping: {str(e)}")
        return []


def scrape_imtiaz(driver, word_to_search, wait_time=5):
    store_name = "Imtiaz"
    IMTIAZ_GROCERY_URL = f"https://shop.imtiaz.com.pk/search?q={word_to_search}"
    driver.get(IMTIAZ_GROCERY_URL)
    wait = WebDriverWait(driver, wait_time)

    products_details = []
    # Select location
    try:
            area = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[2]/div[3]/div/div/div/div/div[3]/div[3]/div/div/input")
            ))
            area.send_keys(Keys.ENTER)
            area.send_keys(Keys.DOWN)
            area.send_keys(Keys.DOWN)
            area.send_keys(Keys.ENTER)
            
            submit_button = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[2]/div[3]/div/div/div/div/div[3]/button")
            ))
            submit_button.click()
    except TimeoutException:
            print(f"[{store_name}] No location box appeared")    
    try:
        # Wait for product containers
        products = wait.until(EC.presence_of_all_elements_located(
            (By.CSS_SELECTOR, ".hazle-product-item_product_item__FSm1N")
        ))

        for product in products:
            try:
                name = product.find_element(By.CLASS_NAME,
                    "hazle-product-item_product_item_title__wK9IT").text
                price = product.find_element(By.CLASS_NAME,
                    "hazle-product-item_product_item_price_label__ET_we").text
                try:
                    link = product.find_element(By.TAG_NAME, "a").get_attribute("href")
                except:
                    link = ""  # if no link found
                products_details.append({
                    "store": store_name,
                    "name": name,
                    "price": price,
                    "link": link
                })
            except Exception as e:
                print(f"[{store_name}] Error extracting product: {str(e)}")
                continue

        print(f"[{store_name}] Found {len(products_details)} products")
        return products_details

    except TimeoutException:
        print(f"[{store_name}] No products found")
        return []


# ===== MAIN SCRAPING FUNCTION =====
def scrape_all_stores(driver, word_to_search):
    """
    Scrape all 5 stores and combine results into a single list
    Returns: list of all products from all stores with store names
    """
    all_products = []
    
    print(f"\n{'='*60}")
    print(f"Starting scraping for: '{word_to_search}'")
    print(f"{'='*60}\n")
    
    # Scrape each store
    stores_scrapers = [
        ("Al-Fateh", scrape_al_fateh),
        ("Metro", scrape_metro),
        ("Jalal Sons", scrape_jalalsons),
        ("Carrefour", scrape_carrefour),
        ("Imtiaz", scrape_imtiaz),
    ]
    
    for store_label, scraper_func in stores_scrapers:
        print(f"\n[SCRAPING {store_label.upper()}]")
        try:
            products = scraper_func(driver, word_to_search)
            all_products.extend(products)
        except Exception as e:
            print(f"FATAL ERROR for {store_label}: {str(e)}")
            continue
    
    print(f"\n{'='*60}")
    print(f"Scraping Complete!")
    print(f"Total products collected: {len(all_products)}")
    print(f"{'='*60}\n")
    
    return all_products



In [1]:
import pandas as pd

def save_products_to_csv(products_list, filename="all_products.csv"):
    """
    Save scraped products to a CSV file.
    Each product dictionary should have: 'store', 'name', 'price', optional 'link'.
    """
    if not products_list:
        print("No products to save!")
        return
    
    df = pd.DataFrame(products_list)

    # Keep only required columns and rename
    df = df[["store", "name", "price"]]
    df.columns = ["Store", "Product Name", "Price"]

    df.to_csv(filename, index=False, encoding="utf-8-sig")
    print(f"✅ Saved {len(df)} products to '{filename}'")

In [9]:

# Scrape all stores for a product,
all_products = scrape_all_stores(driver, "pepsi")

# Save to CSV
save_products_to_csv(all_products, "products_combined.csv")


Starting scraping for: 'pepsi'


[SCRAPING AL-FATEH]
[Al-Fateh] Found 13 relevant products

[SCRAPING METRO]
[Metro] Found 3 products

[SCRAPING JALAL SONS]
[Jalal Sons] No banner appeared
[Jalal Sons] No location box appeared
[Jalal Sons] Found 6 products

[SCRAPING CARREFOUR]
[Carrefour] Found 15 products

[SCRAPING IMTIAZ]
[Imtiaz] No location box appeared
[Imtiaz] Found 6 products

Scraping Complete!
Total products collected: 43

✅ Saved 43 products to 'products_combined.csv'


In [55]:
import os
print(os.getcwd())

/Users/mkbs/Library/Containers/net.whatsapp.WhatsApp/Data/tmp/documents/A750022F-709E-4DA9-93F0-520BABAD89D3
