In [1]:
import random
import time
from selenium_stealth import stealth
from selenium import webdriver
# --- 1. User-Agent Setup ---
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
]
random_user_agent = random.choice(USER_AGENTS)

# --- 2. Proxy Setup ---
# Note: Replace USERNAME and PASSWORD with your actual credentials
#proxy_options = {
#    'proxy': {
#        'no_proxy': 'localhost,127.0.0.1' # Bypasses proxy for local traffic
#    }
#}

# --- 3. Chrome Options Setup ---
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument(f"--user-agent={random_user_agent}")
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument("--headless") # Commented out to see the browser
chrome_options.add_argument("--lang=en-US,en;q=0.9")

# Basic anti-detection options
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)

# Load only DOM content
#chrome_options.page_load_strategy = 'eager' 

# Initialize driver with selenium-wire
driver = webdriver.Chrome(
    options=chrome_options,
    #seleniumwire_options=proxy_options
)

# --- 4. Stealth Setup ---
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# --- 5. Run the Scraper ---
print("Scraper is running with a stealth configuration!")
print(f"Using IP from proxy and User-Agent: {random_user_agent}")

Scraper is running with a stealth configuration!
Using IP from proxy and User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36


In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import json
from selenium.webdriver.common.keys import Keys

In [4]:
#### Refactored Store Scraping Functions with Try-Catch ####

def get_filtered_products(products_details, word_to_search):
    """
    Filter products for relevance based on search term
    Returns: list of relevant products
    """
    filtered = []
    search_term_lower = word_to_search.lower()
    
    for p in products_details:
        title_lower = p["name"].lower()
        if search_term_lower in title_lower:
            filtered.append(p)
            
    return filtered


def scrape_al_fateh(driver, word_to_search, wait_time=10):
    """
    Scrape Al-Fatah store for products
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Al-Fateh"
        AL_FATEH_GROCERY_URL = f"https://alfatah.pk/search?q={word_to_search}"
        
        driver.get(AL_FATEH_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        product_cards = wait.until(EC.presence_of_all_elements_located(
            (By.CSS_SELECTOR, ".col-6.col-sm-4.col-md-3.col-lg-2")
        ))
        
        products_details = []
        for product in product_cards:
            try:
                a_element = product.find_element(By.CSS_SELECTOR, "a[class='product-title-ellipsis']")
                product_link = a_element.get_attribute("href")
                product_name = a_element.text
                product_price= product.find_element(By.CLASS_NAME, "product-price").text
                products_details.append({
                    "store": store_name,
                    "name": product_name,
                    "link": product_link,
                    "price": product_price
                })
            except Exception as e:
                print(f"[{store_name}] Error extracting product: {str(e)}")
                continue
        
        # Filter for relevance
        filtered_products = get_filtered_products(products_details, word_to_search)
        
        print(f"[{store_name}] Found {len(filtered_products)} relevant products")
        return filtered_products
        
    except Exception as e:
        print(f"[Al-Fateh] Error during scraping: {str(e)}")
        return []


def scrape_metro(driver, word_to_search, wait_time=10):
    """
    Scrape Metro store for products with name and price
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Metro"
        METRO_GROCERY_URL = f"https://www.metro-online.pk/search/{word_to_search}?searchText={word_to_search}"
        
        driver.get(METRO_GROCERY_URL)
        #wait = WebDriverWait(driver, wait_time)
        
        # Search for product
        #input_box = wait.until(EC.presence_of_element_located(
         #   (By.CLASS_NAME, "newNavbar_nav_search__LBtcn")
        #))
        #input_box.clear()
       # input_box.send_keys(word_to_search)
        input_box.send_keys(Keys.RETURN)
        
        # Get product cards
        product_cards = wait.until(EC.presence_of_all_elements_located(
            (By.CLASS_NAME, "CategoryGrid_product_card__FUMXW")
        ))

        products_details = []

        for product_card in product_cards:
            try:
                product_link=product_card.find_element(By.TAG_NAME, "a").get_attribute("href")
                name = product_card.find_element(By.CLASS_NAME, "CategoryGrid_product_name__3nYsN").text
                price = product_card.find_element(By.CLASS_NAME, "CategoryGrid_product_price__Svf8T").text
                products_details.append({
                        "store": store_name,
                        "name": name,
                        "link": product_link,
                        "price": price
                    })
            except Exception as e:
                print(f"[{store_name}] Error extracting product details: {str(e)}")
                continue
        
        
        # Filter for relevance
        filtered_products = get_filtered_products(products_details, word_to_search)

        print(f"[{store_name}] Found {len(filtered_products)} products")
        return filtered_products

    except Exception as e:
        print(f"[Metro] Error during scraping: {str(e)}")
        return []


def scrape_jalalsons(driver, word_to_search, wait_time=10):
    """
    Scrape Jalal Sons store for products with name and price
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Jalal Sons"
        JALALSONS_GROCERY_URL = f"https://jalalsons.com.pk/shop?query={word_to_search}"
        
        driver.get(JALALSONS_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        # Close banner if present
        try:
            banner_close_button = wait.until(EC.element_to_be_clickable(
                (By.CSS_SELECTOR, ".cursor-pointer.ms-auto")
            ))
            banner_close_button.click()
        except TimeoutException:
            print(f"[{store_name}] No banner appeared")
        
        # Select location from dropdown
        try:
            from selenium.webdriver.support.ui import Select
            location_dropdown = wait.until(EC.presence_of_element_located(
                (By.ID, "selectDeliveryBranch")
            ))
            select_object = Select(location_dropdown)
            all_options = select_object.options
            
            enabled_options = [
                opt for opt in all_options
                if opt.is_enabled() and opt.get_attribute('value') != ""
            ]
            
            if enabled_options:
                random_option = random.choice(enabled_options)
                select_object.select_by_visible_text(random_option.text)
                
                try:
                    submit_button = driver.find_element(By.CLASS_NAME, "current_loc_pop_btn")
                    submit_button.click()
                except Exception as e:
                    print(f"[{store_name}] No button to confirm location selection: {str(e)}")
        except:
            print(f"[{store_name}] No location box appeared")
        
        # Get products
        product_cards = wait.until(EC.presence_of_all_elements_located(
            (By.CLASS_NAME, "single_product_theme")
        ))

        products_details = []

        for product_card in product_cards:
            try:
                product_link = product_card.find_element(By.TAG_NAME, "a").get_attribute("href")
                name=product_card.find_element(By.CLASS_NAME, "product_name_theme").text
                currency=product_card.find_element(By.CLASS_NAME, "item-currency").text
                price=product_card.find_element(By.CLASS_NAME, "price-value").text
                products_details.append({
                    "store": store_name,
                    "name": name,
                    "link": product_link,
                    "price": f"{currency} {price}"
                })
       
            except Exception as e:
                print(f"[{store_name}] Error extracting product details: {str(e)}")
                continue
        
        
        filtered_products = get_filtered_products(products_details, word_to_search)
        print(f"[{store_name}] Found {len(filtered_products)} products")
        return filtered_products

    except Exception as e:
        print(f"[Jalal Sons] Error during scraping: {str(e)}")
        return []


def scrape_carrefour(driver, word_to_search, wait_time=10):
    """
    Scrape Carrefour store for products with name and price
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Carrefour"
        CAREFOUR_GROCERY_URL = f"https://www.carrefour.pk/mafpak/en/search?keyword={word_to_search}"
        
        driver.get(CAREFOUR_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        product = wait.until(EC.presence_of_element_located(
            (By.XPATH, "/html/body/div[1]/main/div/div[2]/div[2]/div/div[2]/div/div")
        ))
        
        
        
        product_links = set()
        for link in product.find_elements(By.TAG_NAME, "a"):
            href = link.get_attribute("href")
            if href:
                product_links.add(href)

        products_details = []
        for link in product_links:
            try:
                driver.get(link)
                name = driver.find_element(
                    By.XPATH, "/html/body/div[1]/main/div/div[3]/div/div[2]/h1"
                ).text
                price = driver.find_element(
                    By.XPATH, "/html/body/div[1]/main/div/div[3]/div/div[3]/div[1]/div[1]"
                ).text
                
                products_details.append({
                    "store": store_name,
                    "name": name,
                    "link": link,
                    "price": price
                })
            except Exception as e:
                print(f"[{store_name}] Error extracting product details: {str(e)}")
                continue
        
        filtered_products = get_filtered_products(products_details, word_to_search)
        print(f"[{store_name}] Found {len(filtered_products)} products")
        return filtered_products
        
    except Exception as e:
        print(f"[Carrefour] Error during scraping: {str(e)}")
        return []


def scrape_imtiaz(driver, word_to_search, wait_time=2):
    """
    Scrape Imtiaz store for products
    Returns: list of products with store name, or empty list on error
    """
    try:
        store_name = "Imtiaz"
        IMTIAZ_GROCERY_URL = f"https://shop.imtiaz.com.pk/search?q={word_to_search}"
        
        driver.get(IMTIAZ_GROCERY_URL)
        wait = WebDriverWait(driver, wait_time)
        
        # Select location
        try:
            area = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[2]/div[3]/div/div/div/div/div[3]/div[3]/div/div/input")
            ))
            area.send_keys(Keys.ENTER)
            area.send_keys(Keys.DOWN)
            area.send_keys(Keys.DOWN)
            area.send_keys(Keys.ENTER)
            
            submit_button = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[2]/div[3]/div/div/div/div/div[3]/button")
            ))
            submit_button.click()
        except TimeoutException:
            print(f"[{store_name}] No location box appeared")
        
        # Get products
        try:
            products = wait.until(EC.presence_of_all_elements_located(
                (By.CSS_SELECTOR, ".hazle-product-item_product_item__FSm1N.MuiBox-root.blink-style-5bkk4b")
            ))
            
            products_details = []
            for product in products:
                try:
                    # Extract product info from the element
                    # Note: Adjust selectors based on actual HTML structure
                    product_info = product.text
                    if product_info:
                        products_details.append({
                            "store": store_name,
                            "name": product_info,
                            "price": ""
                        })
                except Exception as e:
                    print(f"[{store_name}] Error extracting product info: {str(e)}")
                    continue
            
            print(f"[{store_name}] Found {len(products_details)} products")
            return products_details
            
        except TimeoutException:
            print(f"[{store_name}] No products found")
            return []
        
    except Exception as e:
        print(f"[Imtiaz] Error during scraping: {str(e)}")
        return []


# ===== MAIN SCRAPING FUNCTION =====
def scrape_all_stores(driver, word_to_search):
    """
    Scrape all 5 stores and combine results into a single list
    Returns: list of all products from all stores with store names
    """
    all_products = []
    
    print(f"\n{'='*60}")
    print(f"Starting scraping for: '{word_to_search}'")
    print(f"{'='*60}\n")
    
    # Scrape each store
    stores_scrapers = [
        ("Al-Fateh", scrape_al_fateh),
        ("Metro", scrape_metro),
        ("Jalal Sons", scrape_jalalsons),
        ("Carrefour", scrape_carrefour),
        ("Imtiaz", scrape_imtiaz),
    ]
    
    for store_label, scraper_func in stores_scrapers:
        print(f"\n[SCRAPING {store_label.upper()}]")
        try:
            products = scraper_func(driver, word_to_search)
            all_products.extend(products)
        except Exception as e:
            print(f"FATAL ERROR for {store_label}: {str(e)}")
            continue
    
    print(f"\n{'='*60}")
    print(f"Scraping Complete!")
    print(f"Total products collected: {len(all_products)}")
    print(f"{'='*60}\n")
    
    return all_products


# ===== USAGE EXAMPLE =====
# Uncomment below to run the scraper
#word_to_search = "pepsi"
#all_collected_products = scrape_all_stores(driver, word_to_search)
#print(all_collected_products)
#with open(f"{word_to_search}_products.json", "w") as f:
    #json.dump(all_collected_products, f, indent=4)

In [None]:
def infinite_scroll(driver):
    #infinte scroll to load all products
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(random.uniform(2, 5))
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

In [None]:
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import json

all_products = []
STORE_NAME="jalal-sons"
JALAL_SONS_URL = "https://jalalsons.com.pk/"

try:
        store_name = "Jalal Sons"
        JALALSONS_GROCERY_URL = f"https://jalalsons.com.pk/shop?query={word_to_search}"
        
        driver.get(JALALSONS_GROCERY_URL)
        wait = WebDriverWait(driver, 10)
        
        # Close banner if present
        try:
            banner_close_button = wait.until(EC.element_to_be_clickable(
                (By.CSS_SELECTOR, ".cursor-pointer.ms-auto")
            ))
            banner_close_button.click()
        except TimeoutException:
            print(f"[{store_name}] No banner appeared")
        
        # Select location from dropdown
        try:
            from selenium.webdriver.support.ui import Select
            location_dropdown = wait.until(EC.presence_of_element_located(
                (By.ID, "selectDeliveryBranch")
            ))
            select_object = Select(location_dropdown)
            all_options = select_object.options
            
            enabled_options = [
                opt for opt in all_options
                if opt.is_enabled() and opt.get_attribute('value') != ""
            ]
            
            if enabled_options:
                random_option = random.choice(enabled_options)
                select_object.select_by_visible_text(random_option.text)
                
                try:
                    submit_button = driver.find_element(By.CLASS_NAME, "current_loc_pop_btn")
                    submit_button.click()
                except Exception as e:
                    print(f"[{store_name}] No button to confirm location selection: {str(e)}")
        except:
            print(f"[{store_name}] No location box appeared")
        

        #build a action chain to move to dropdown and perform
        from selenium.webdriver.common.action_chains import ActionChains
        actions=ActionChains(driver)
        location_dropdown=driver.find_element(By.XPATH, "/html/body/header[3]/div/nav/div[1]/ul/li[8]")
        actions.move_to_element(location_dropdown)
        actions.perform()
        
        categories_box=wait.until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "drop-sub-links"))
        )
        category_boxes=categories_box.find_elements(By.TAG_NAME, "a")
        category_boxes_links = [box.get_attribute("href") for box in category_boxes]
        categories = [box.text.strip() for box in category_boxes]
    

        for category_link, CATEGORY_NAME in zip(category_boxes_links, categories):
   
            time.sleep(random.uniform(2, 7))
            #click on the category
            driver.get(category_link)
           
            # Get products
            wait.until(EC.presence_of_all_elements_located(
                (By.CLASS_NAME, "single_product_theme")
            ))
            infinite_scroll(driver)
            product_cards = wait.until(EC.presence_of_all_elements_located(
                (By.CLASS_NAME, "single_product_theme")
            ))

            for product_card in product_cards:
                try:
                    product_link = product_card.find_element(By.TAG_NAME, "a").get_attribute("href")
                    name=product_card.find_element(By.CLASS_NAME, "product_name_theme").text.strip()
                    currency=product_card.find_element(By.CLASS_NAME, "item-currency").text.strip()
                    price=product_card.find_element(By.CLASS_NAME, "price-value").text.strip()
                    all_products.append({
                        "store": store_name,
                        "name": name,
                        "product_link": product_link,
                        "price": f"{currency} {price}",
                        "category":CATEGORY_NAME.strip(),
                    })
        
                except Exception as e:
                    print(f"[{store_name}] Error extracting product details: {str(e)}")
                    continue
                
            print (f"Category '{CATEGORY_NAME}': Found {len(product_cards)} products")

except Exception as e:
    print(f"An error occurred: {e}")
    driver.save_screenshot("error_screenshot.png")

finally:
    pass
    # Save to JSON
    with open(f"{STORE_NAME}-products.json", "w") as f:
        json.dump(all_products, f, indent=4)
    #driver.quit()

In [None]:
scrape_jalalsons(driver, "pepsi")

In [3]:
import pandas as pd
df = pd.read_json("al-fateh-products.json")

display(df.sample(10))

print(df.groupby("category").size())

Unnamed: 0,store,category,name,price,product_link
6106,al-fateh,Ice Cream,MOVENPICK ICE CREAM VANILLA DREAM 500ML,"Rs.4,345",https://alfatah.pk/products/movenpick-ice-crea...
1330,al-fateh,Frozen Items,PEETZA HOUR CHICKITA 570 GM,"Rs.2,395",https://alfatah.pk/products/peetza-hour-chicki...
1433,al-fateh,Frozen Fries,MANO SALWA CHICKEN FRIES 425 GM,Rs.399,https://alfatah.pk/products/mano-salwa-chicken...
5541,al-fateh,Jams,SHEZAN JAM MIXED FRUIT 370 GM,Rs.275,https://alfatah.pk/products/shezan-jam-mixed-f...
1712,al-fateh,Rice Products,GUARD ULTIMATE BASMATI RICE 5 KG,"Rs.2,695",https://alfatah.pk/products/guard-ultimate-bas...
5470,al-fateh,Honey,YOUNGS NATURAL HONEY BEE HIVES 500 GM,Rs.895,https://alfatah.pk/products/youngs-natural-hon...
2304,al-fateh,Biscuits,FLAIR KUNAFA BROWNIES BAR 50GM,Rs.415,https://alfatah.pk/products/flair-kunafa-brown...
4464,al-fateh,Coffee,NESCAFE CLASSIC COFFE SACHET 50 GM,Rs.695,https://alfatah.pk/products/nescafe-classic-co...
5145,al-fateh,Baking Items,RAFHAN VANILLA ICE CREAM POWDER 275 GM,Rs.210,https://alfatah.pk/products/rafhan-vanilla-ice...
5065,al-fateh,Baking Accessories,COOKIE PRESS ICING SET TAILL,"Rs.1,395",https://alfatah.pk/products/cookie-press-icing...


category
Baby food                   161
Baking Accessories          126
Baking Chocolates            24
Baking Items                182
Biscuits                    622
Bread                        54
Butter                       45
Candies & Bubble Gums       459
Cereals                     176
Cheese                      153
Chips & Nimko               368
Chocolates                  301
Coffee                      122
Dairy Creams                 20
Drinking Powders             48
Drinking water               49
Dry Fruits & Dates          177
Eggs                          9
Flavoured Milk               27
Flour                        60
Frozen Fries                 28
Frozen Items                538
Ghee                         42
Honey                        78
Ice Cream                    82
Imported Drinks & Juices    193
Jams                         68
Liquid Tin Milk               9
Local Drinks                319
Margarine                    14
Mayo & Spreads              163