In [1]:
import random
import time
from selenium_stealth import stealth
from selenium import webdriver
# --- 1. User-Agent Setup ---
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
]
random_user_agent = random.choice(USER_AGENTS)

# --- 2. Proxy Setup ---
# Note: Replace USERNAME and PASSWORD with your actual credentials
#proxy_options = {
#    'proxy': {
#        'no_proxy': 'localhost,127.0.0.1' # Bypasses proxy for local traffic
#    }
#}

# --- 3. Chrome Options Setup ---
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument(f"--user-agent={random_user_agent}")
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument("--headless") # Commented out to see the browser
chrome_options.add_argument("--lang=en-US,en;q=0.9")

# Basic anti-detection options
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)

# Load only DOM content
chrome_options.page_load_strategy = 'eager' 

# Initialize driver with selenium-wire
driver = webdriver.Chrome(
    options=chrome_options,
    #seleniumwire_options=proxy_options
)

# --- 4. Stealth Setup ---
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )

# --- 5. Run the Scraper ---
print("Scraper is running with a stealth configuration!")
print(f"Using IP from proxy and User-Agent: {random_user_agent}")

Scraper is running with a stealth configuration!
Using IP from proxy and User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36


In [31]:
def infinite_scroll(driver):
    #infinte scroll to load all products
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(random.uniform(2, 5))
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

In [7]:
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import json

all_products = []
STORE_NAME="al-fateh"
AL_FATEH_GROCERY_URL = "https://alfatah.pk/pages/grocery-foods"


try:
    #get to grocery foods page
    driver.get(AL_FATEH_GROCERY_URL)
    
    #wait for all categories to load
    wait=WebDriverWait(driver, 10)
    category_boxes=wait.until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "box"))
    )
    print("Al-Fateh Grocery page loaded successfully.")
    category_boxes_links = [box.find_element(By.TAG_NAME, "a").get_attribute("href") for box in category_boxes]
    categories = [box.text for box in category_boxes]
    
    
    #for each category, click and wait for products to load
    for category_link, CATEGORY_NAME in zip(category_boxes_links, categories):
  
        time.sleep(random.uniform(2, 7))
        #click on the category
        driver.get(category_link)
        
        
        wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".col-6.col-sm-6.col-md-4.col-lg-2")))
        infinite_scroll(driver)
        products=wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".col-6.col-sm-6.col-md-4.col-lg-2")))
        products_count = len(products)
        
        
        for product in products:
            NAME = None
            PRICE = None
            PRODUCT_LINK = None
            
            PRICE=product.find_element(By.CLASS_NAME, "product-price").text.strip()
            NAME=product.find_element(By.CLASS_NAME, "product-title-ellipsis").text.strip()
            PRODUCT_LINK=product.find_element(By.TAG_NAME, "a").get_attribute("href")
            
            all_products.append({
                "store": STORE_NAME,
                "category":CATEGORY_NAME.strip(),
                "name": NAME,
                "price": PRICE,
                "product_link": PRODUCT_LINK
            })
            
        print (f"Category '{CATEGORY_NAME}': Found {len(products)} products")
        #pprint(all_products)
    
except Exception as e:
    print(f"An error occurred: {e}")
    driver.save_screenshot("error_screenshot.png")

finally:
    pass
    # Save to JSON
    with open(f"{STORE_NAME}-products.json", "w") as f:
        json.dump(all_products, f, indent=4)
    #driver.quit()

Al-Fateh Grocery page loaded successfully.
Category 'Dry Fruits & Dates
  ': Found 177 products
Category 'Teas
  ': Found 164 products
Category 'Local Drinks
  ': Found 319 products
Category 'Milk
  ': Found 36 products
Category 'Baby food
  ': Found 161 products
Category 'Flavoured Milk
  ': Found 27 products
Category 'Frozen Items
  ': Found 538 products
Category 'Frozen Fries
  ': Found 28 products
Category 'Oils
  ': Found 104 products
Category 'Ghee
  ': Found 42 products
Category 'Flour
  ': Found 60 products
Category 'Sugar
  ': Found 9 products
Category 'Salt
  ': Found 41 products
Category 'Rice Products
  ': Found 64 products
Category 'Chips & Nimko
  ': Found 368 products
Category 'Biscuits
  ': Found 622 products
Category 'Chocolates
  ': Found 301 products
Category 'Sauces & Soups
  ': Found 275 products
Category 'Mayo & Spreads
  ': Found 163 products
Category 'Noodles & Pasta
  ': Found 203 products
Category 'Candies & Bubble Gums
  ': Found 459 products
Category 'Cereal

In [10]:
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import json

all_products = []
STORE_NAME="jalal-sons"
JALAL_SONS_URL = "https://jalalsons.com.pk/"

try:
        store_name = "Jalal Sons"
        JALALSONS_GROCERY_URL = f"https://jalalsons.com.pk/"
        
        driver.get(JALALSONS_GROCERY_URL)
        wait = WebDriverWait(driver, 20)
        
        # Close banner if present
        try:
            banner_close_button = wait.until(EC.element_to_be_clickable(
                (By.CSS_SELECTOR, ".cursor-pointer.ms-auto")
            ))
            banner_close_button.click()
        except TimeoutException:
            print(f"[{store_name}] No banner appeared")
        
        # Select location from dropdown
        try:
            from selenium.webdriver.support.ui import Select
            location_dropdown = wait.until(EC.presence_of_element_located(
                (By.ID, "selectDeliveryBranch")
            ))
            select_object = Select(location_dropdown)
            all_options = select_object.options
            
            enabled_options = [
                opt for opt in all_options
                if opt.is_enabled() and opt.get_attribute('value') != ""
            ]
            
            if enabled_options:
                random_option = random.choice(enabled_options)
                select_object.select_by_visible_text(random_option.text)
                
                try:
                    submit_button = driver.find_element(By.CLASS_NAME, "current_loc_pop_btn")
                    submit_button.click()
                except Exception as e:
                    print(f"[{store_name}] No button to confirm location selection: {str(e)}")
        except:
            print(f"[{store_name}] No location box appeared")
        

        #build a action chain to move to dropdown and perform
        from selenium.webdriver.common.action_chains import ActionChains
        actions=ActionChains(driver)
        location_dropdown=driver.find_element(By.XPATH, "/html/body/header[3]/div/nav/div[1]/ul/li[8]")
        actions.move_to_element(location_dropdown)
        actions.perform()
        
        wait.until(
        EC.presence_of_element_located((By.CLASS_NAME, "dropdown-content"))
        )
        categories_box=location_dropdown.find_element(By.CLASS_NAME, "dropdown-content")
        category_boxes=categories_box.find_elements(By.TAG_NAME, "a")
        category_boxes_links = [box.get_attribute("href") for box in category_boxes]
        categories = [box.get_attribute("textContent").strip() for box in category_boxes]
        
        print(categories)
        
        for category_link, CATEGORY_NAME in zip(category_boxes_links, categories):
            try:
                time.sleep(random.uniform(2, 7))
                #click on the category
                driver.get(category_link)
            
                # Get products
                wait.until(EC.presence_of_all_elements_located(
                    (By.CLASS_NAME, "single_product_theme")
                ))
                infinite_scroll(driver)
                product_cards = wait.until(EC.presence_of_all_elements_located(
                    (By.CLASS_NAME, "single_product_theme")
                ))

                for product_card in product_cards:
                    try:
                        product_link = product_card.find_element(By.TAG_NAME, "a").get_attribute("href")
                        name=product_card.find_element(By.CLASS_NAME, "product_name_theme").text.strip()
                        currency=product_card.find_element(By.CLASS_NAME, "item-currency").text.strip()
                        price=product_card.find_element(By.CLASS_NAME, "price-value").text.strip()
                        all_products.append({
                            "store": store_name,
                            "name": name,
                            "product_link": product_link,
                            "price": f"{currency} {price}",
                            "category":CATEGORY_NAME,
                        })
            
                    except Exception as e:
                        print(f"[{store_name}] Error extracting product details: {str(e)}")
                        continue
                    
                print (f"Category '{CATEGORY_NAME}': Found {len(product_cards)} products")

            except Exception as e:
                    print(f"[{store_name}] Error extracting category {str(e)}")
                    continue
except Exception as e:
    print(f"An error occurred: {e}")
    driver.save_screenshot("error_screenshot.png")
    

finally:
    pass
    # Save to JSON
    with open(f"{STORE_NAME}-products.json", "w") as f:
        json.dump(all_products, f, indent=4)
    #driver.quit()

[Jalal Sons] No banner appeared
[Jalal Sons] No location box appeared
['Butter', 'Baby Milk And Food', 'Beverages', 'Biscuits , Crisps And Snacks', 'Baking Goods', 'Canned Foods And Milks', 'Cereal, Jams And Spreads', 'Confectionery And Chocolates', 'Cleaning Products', 'Cosmetics', 'Diapers And Pampers', 'Dry Fruits And Dates', 'Fresh Milk And Eggs', 'Frozen Foods', 'Flour, Rice And Pulses', 'Ghee And Oil', 'Ketchup, Sauce And Mayo', 'Mineral Water', 'Other Food Items', 'Personal Hygiene', 'Pet Food', 'Powder Milk', 'Spices And Miscellaneous', 'Tea And Coffee', 'Toiletries', 'Tissues And Sanitary', 'Yoghurt, Butter, Cream And Cheese']
Category 'Butter': Found 25 products
Category 'Baby Milk And Food': Found 27 products
Category 'Beverages': Found 134 products
Category 'Biscuits , Crisps And Snacks': Found 154 products
Category 'Baking Goods': Found 15 products
Category 'Canned Foods And Milks': Found 38 products
Category 'Cereal, Jams And Spreads': Found 80 products
Category 'Confecti

In [49]:
from selenium.webdriver.common.keys import Keys
try:
        store_name = "Imtiaz"
        IMTIAZ_GROCERY_URL = f"https://shop.imtiaz.com.pk/"
        
        driver.get(IMTIAZ_GROCERY_URL)
        wait = WebDriverWait(driver, 10)
        
        all_products = []
        # Select location
        try:
            area = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[2]/div[3]/div/div/div/div/div[3]/div[3]/div/div/input")
            ))
            area.send_keys(Keys.ENTER)
            area.send_keys(Keys.DOWN)
            area.send_keys(Keys.DOWN)
            area.send_keys(Keys.ENTER)
            
            submit_button = wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[2]/div[3]/div/div/div/div/div[3]/button")
            ))
            submit_button.click()
        except TimeoutException:
            print(f"[{store_name}] No location box appeared")
        
        # Get products
        try:
            categories_tag=driver.find_element(By.XPATH, "/html/body/div[1]/div[2]/div/div[1]/div[3]/div[1]")
            categories_tag.click()
            categories_list=wait.until(EC.presence_of_element_located(
                (By.XPATH, "/html/body/div[1]/div[2]/div/div[1]/div[3]/div[3]/ul")
            ))
            categories_list=categories_list.find_elements(By.TAG_NAME, "a")
            categpry_links=[]
            
            for i in categories_list:
                categpry_links.append(i.get_attribute("href"))
            
            print(categpry_links)

            for link in categpry_links:
                try:
                    
                    driver.get(link)
                    
                    sub_categories_container = wait.until(EC.element_to_be_clickable(
                        (By.XPATH, "/html/body/div[1]/div[2]/div/div[2]/div/div")
                    ))
                    
                    #print(sub_categories_container.text)
                    #break
                    
                    sub_categories = sub_categories_container.find_elements(By.TAG_NAME, "a")
                    sub_categories_links=[sub_category.get_attribute("href") for sub_category in sub_categories]
                    sub_categories_names=[sub_category.get_attribute("textContent").strip() for sub_category in sub_categories]
                    
                    #print(sub_categories_names)
                    #print(sub_categories_links)
                    #break
                    
                    for sub_category_link,sub_category_name in zip(sub_categories_links,sub_categories_names):
                        driver.get(sub_category_link)
                        products = []
                        current_url =None
                        #now gonna click on next button until it is disabled
                        while True:
                            try:
                                wait.until(EC.url_changes(current_url))
                                wait.until(EC.presence_of_all_elements_located(
                                    (By.CLASS_NAME, "hazle-product-item_product_item__FSm1N")
                                ))
                                infinite_scroll(driver)
                                temp=wait.until(EC.presence_of_all_elements_located(
                                    (By.CLASS_NAME, "hazle-product-item_product_item__FSm1N")
                                ))
                                products= temp
                                
                                
                                for product in products:
                                    try:
                                        product_text_container=product.find_element(By.CLASS_NAME, "hazle-product-item_product_item_text_container__Apuq1")
                                        
                                        product_name = product_text_container.find_element(By.CLASS_NAME, "hazle-product-item_product_item_description__ejRDa").text.strip()
                                        product_price = product_text_container.find_element(By.CLASS_NAME, "hazle-product-item_product_item_price_label__ET_we").text.strip()
                                        
                                        product_link_id = product.get_attribute("id")
                                        product_link = f"https://shop.imtiaz.com.pk/product/{product_link_id}"
                                        
                                        all_products.append({
                                                "store": store_name,
                                                "name": product_name,
                                                "product_link": product_link,
                                                "price": product_price,
                                                "category":sub_category_name,
                                            })
                                    except Exception as e:
                                        print(f"[{store_name}] Error extracting product info: {str(e)}")
                                        continue
                                    
                                print (f"Sub-Category '{sub_category_name}': Found {len(products)} products")
                                    
                                    
                                    
                                button = driver.find_element(By.XPATH, "//button[normalize-space()='Next']")
                                    
                                if button.get_attribute("disabled"):
                                    print("Next button is disabled")
                                    break
                                else:
                                    current_url = driver.current_url
                                    button.click()
                            except NoSuchElementException:
                                    print("Next button not found, assuming last page")
                                    break
                            
                     
                except Exception as e:
                            print(f"[{store_name}] Error extracting sub-category {str(e)}")
                            continue
                     
        except TimeoutException:
            print(f"[{store_name}] No products found")
            
        
except Exception as e:
        print(f"[Imtiaz] Error during scraping: {str(e)}")
        
        
finally:
    pass
    # Save to JSON
    with open(f"{store_name}-products.json", "w") as f:
        json.dump(all_products, f, indent=4)
    #driver.quit()

[Imtiaz] No location box appeared
['https://shop.imtiaz.com.pk/catalog/fresh-4093', 'https://shop.imtiaz.com.pk/catalog/bakery-4108', 'https://shop.imtiaz.com.pk/catalog/snacks--confectionary-4087', 'https://shop.imtiaz.com.pk/catalog/beverages-4089', 'https://shop.imtiaz.com.pk/catalog/tea--coffee-4091', 'https://shop.imtiaz.com.pk/catalog/edible-grocery-4085', 'https://shop.imtiaz.com.pk/catalog/dairy-4095', 'https://shop.imtiaz.com.pk/catalog/frozen-4104', 'https://shop.imtiaz.com.pk/catalog/baby-world-4107', 'https://shop.imtiaz.com.pk/catalog/health--beauty-4101', 'https://shop.imtiaz.com.pk/catalog/home-care-4097', 'https://shop.imtiaz.com.pk/catalog/pet-essentials-4103', 'https://shop.imtiaz.com.pk/catalog/pharmacy-4099']
[Imtiaz] Error extracting sub-category Message: 

Sub-Category 'Biscuits & Wafers': Found 2 products
Next button not found, assuming last page
[Imtiaz] Error extracting sub-category Message: 

Sub-Category 'Carbonated Soft Drinks': Found 13 products
Next button

In [None]:


try:
        store_name = "Metro"
        METRO_GROCERY_URL = f"https://www.metro-online.pk/"
                
        driver.get(METRO_GROCERY_URL)
        
        time.sleep(random.uniform(2, 7))
        
        wait=WebDriverWait(driver, 10)
        
        three_bars_icon=wait.until(EC.presence_of_element_located((By.CLASS_NAME, "NewDesktopNav_menu_outlined_icon_container__8Lrtz")))
        three_bars_icon.click()
        
        grocery_categories_container=wait.until(EC.presence_of_element_located((By.CLASS_NAME, "CategoryListingWeb_catergory_listing_Grocery_container__qM57p")))
        
        category_boxes=grocery_categories_container.find_elements(By.CLASS_NAME, "CategoryListingWeb_category_listing_container__wJJOm")
        
        for category_box in category_boxes:
                #make an action chain to click on category box
                actions=ActionChains(driver)
                actions.move_to_element(category_box)
                actions.perform()
                
                category_elements=category_box.find_elements(By.CLASS_NAME, "CategoryListingWeb_category_expanded_level_three_item__jjFUX")
                
                for category_element in category_elements:
                        print(category_element.text)
                
        
        
        
        
        
        

except Exception as e:
        print(f"[Metro] Error during scraping: {str(e)}")


[Metro] Error during scraping: HTTPConnectionPool(host='localhost', port=60032): Max retries exceeded with url: /session/0cd247ed90d21f662793eedcfc6ebf82/url (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001DA903DE580>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))


In [None]:
# Get product cards
        product_cards = wait.until(EC.presence_of_all_elements_located(
            (By.CLASS_NAME, "CategoryGrid_product_card__FUMXW")
        ))

        products_details = []

        for product_card in product_cards:
            try:
                product_link=product_card.find_element(By.TAG_NAME, "a").get_attribute("href")
                name = product_card.find_element(By.CLASS_NAME, "CategoryGrid_product_name__3nYsN").text
                price = product_card.find_element(By.CLASS_NAME, "CategoryGrid_product_price__Svf8T").text
                products_details.append({
                        "store": store_name,
                        "name": name,
                        "link": product_link,
                        "price": price
                    })
            except Exception as e:
                print(f"[{store_name}] Error extracting product details: {str(e)}")
                continue
        
        
        # Filter for relevance

In [50]:
import pandas as pd
df = pd.read_json("Imtiaz-products.json")
df.to_csv("Imtiaz-products.csv", index=False)





In [12]:
df.sample(10)

Unnamed: 0,store,name,product_link,price,category
140,Jalal Sons,Nestle Peach Juice Bottle(1 L),https://jalalsons.com.pk/product/nestle-peach-...,Rs 359,Beverages
457,Jalal Sons,Nestle Gold Cornflakes 375Gm,https://jalalsons.com.pk/product/nestle-gold-c...,"Rs 1,048","Cereal, Jams And Spreads"
1397,Jalal Sons,Tapal Danedar Pouch Bag 900Gm,https://jalalsons.com.pk/product/tapal-danedar...,"Rs 1,674",Tea And Coffee
1624,Jalal Sons,Adam Butter 100G(100 G),https://jalalsons.com.pk/product/adam-butter-1...,Rs 344,"Yoghurt, Butter, Cream And Cheese"
533,Jalal Sons,Ferrero Rocher T3 37.5Gm,https://jalalsons.com.pk/product/ferrero-roche...,Rs 594,Confectionery And Chocolates
862,Jalal Sons,Dawn Burger Patties 992GM,https://jalalsons.com.pk/product/dawn-burger-p...,"Rs 1,344",Frozen Foods
866,Jalal Sons,Dawn Plain Paratha 20Pcs,https://jalalsons.com.pk/product/dawn-plain-pa...,Rs 938,Frozen Foods
745,Jalal Sons,Surf Excel Top Load 1KG(1 Kg),https://jalalsons.com.pk/product/surf-excel-to...,Rs 699,Cleaning Products
263,Jalal Sons,Lays Yogurt And Herb 72Gm,https://jalalsons.com.pk/product/lays-yogurt-a...,Rs 60,"Biscuits , Crisps And Snacks"
946,Jalal Sons,K&N Croquettes 1KG(1 Kg),https://jalalsons.com.pk/product/kn-croquettes...,"Rs 1,384",Frozen Foods


In [None]:
driver.quit()