In [2]:
import time
import random
import requests
from bs4 import BeautifulSoup

In [4]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9"
}

In [8]:
def fetch(url, delay=(1,2)):
    resp = requests.get(url, headers=HEADERS, timeout=15)
    resp.raise_for_status()
    time.sleep(random.uniform(*delay))
    return resp.text

In [12]:
def parse_product_list(html):
    soup = BeautifulSoup(html, "html.parser")
    products = []
    
    for a in soup.select("a[title]"):
        title = a.get("title")
        href = a.get("href")
        if title and href and "/product/" in href  or "daraz" in href:
            products.append({"title": title.scipt(), "url": href})
        
        if not products:
            for img in soup.select("img[alt]"):
                alt = img.get("alt").strip()
                src = img.get("src") or img.get("data-src")
                if alt:
                    products.append({"title": alt, "image": src})

        if not products:
            for tag in soup.select("h2, h3"):
                text = tag.get_text(strip=True)
                if len(text) > 3:
                    products.append({"title": text})

        return products
                

In [32]:
url = "https://www.daraz.pk/smartphones/"  # example tag page (may change)
html = fetch(url)
products = parse_product_list(html)
print(type(products))
print(len(products) if products else "No products found!")

if products:
    for p in products[:30]:
        print(p)
else:
    print("⚠️ parse_product_list() returned None or an empty list.")

<class 'list'>
No products found!
⚠️ parse_product_list() returned None or an empty list.


In [34]:
requests.get("https://www.daraz.pk/smartphones/")

<Response [200]>

In [36]:
import requests
url = "https://www.daraz.pk/smartphones/"
r = requests.get(url)
print(r.status_code)
print(r.text[:1000])

200

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta name="data-spm" content="a2a0e" />
    <meta http-equiv="x-ua-compatible" content="ie=edge" />
    <meta name="viewport" content="width=device-width" />
    <meta name="referrer" content="origin" />
    <meta name="lzd-itrace-close-auto-monitor" content="assetPerf,imagesPerf">
    
      <meta name="aplus-rhost-v" content="sg.mmstat.com">
      <meta name="aplus-rhost-g" content="sg.mmstat.com">
      <meta name="aplus-cpvdata" content="{&quot;daraz_web_version&quot;:&quot;2.0.0&quot;}">
      <meta name="aplus-exdata" content="{&quot;daraz_web_version&quot;:&quot;2.0.0&quot;}">
    
    <link rel="preconnect dns-prefetch" href="//cart.daraz.pk" />
    <link rel="preconnect dns-prefetch" href="//member.daraz.pk" />
    <link rel="preconnect dns-prefetch" href="//acs-m.daraz.pk" />
    <link rel="preconnect dns-prefetch" href="//laz-img-cdn.alicdn.com" />
    <link rel="preconnect dns-prefetch" href="/

# Because thi website is not HTML it’s loaded dynamically by JavaScript after the browser runs the page.

In [112]:
# from selenium import webdriver
# from selenium.webdriver.chrome.options import Options
# from bs4 import BeautifulSoup
# import time
# import pandas as pd


# opts = Options()
# opts.add_argument("--headless=new")
# opts.add_argument("--disable-gpu")
# opts.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
#                   "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122 Safari/537.36")

# driver = webdriver.Chrome(options=opts)

# url = "https://www.daraz.pk/smartphones/"
# driver.get(url)
# time.sleep(5)  # wait for JS to load

# html = driver.page_source
# soup = BeautifulSoup(html, "html.parser")

# titles = []
# # Example: product titles
# for card in soup.select("div[data-qa-locator='product-item']"):
#     title = card.get_text()
#     titles.append(title)

# df = pd.DataFrame(titles, columns=["text_data"])
# print(df)

# driver.quit()

                                            text_data
0   Samsung Galaxy A06 4GB RAM + 64 GB ROMRs. 21,4...
1   Combo Deal - Original ViVo Y85 with Box Charge...
2   Global Variant Vivo Y17 with Box Fast Turbo Ch...
3   Samsung Galaxy A06 4GB RAM + 128 GB ROMRs. 25,...
4   LIMITED STOCK: OPPO Reno12 F 5G | 12GB RAM + 2...
5   Samsung Galaxy A06 6GB RAM + 128 GB ROMRs. 30,...
6   Vivo Y85 4gb RAM 64gb Memory Charger back cove...
7   Imported Vivo Y66 Gifts( Charger Sim Eject Too...
8   Imported ViVo S1 with Box Dual Engine Fast Cha...
9   Oppo A57 - 5.2" HD Display - 4GB RAM - 64GB RO...
10  Itel A80 - 6.7" Inch Display - 3GB RAM + 5GB E...
11  [Official Approved] Oppo A57 - 5.2" HD Display...
12  [Official PTA Approved] Vivo Y17 - 6.35 Inches...
13  Samsung Galaxy A26 8/256GBRs. 73,499319 sold(4...
14  REDMI 13  8GBRAM-128GBROM 6.79 inchesDISPLAY50...
15  Redmi Note 14 8GB RAM + 256 GB ROM Non Scanned...
16           Redmi 13 8GB+128GBRs. 32,99967 soldSindh
17  Samsung Galaxy A06 - 6.7

In [116]:
import requests

url = "https://www.daraz.pk/smartphones/?ajax=true&isFirstRequest=true&page=1"
headers = {"User-Agent": "Mozilla/5.0"}
r = requests.get(url, headers=headers)
data = r.json()  # if the response is JSON

In [122]:
data.keys()

dict_keys(['templates', 'mods', 'mainInfo', 'seoInfo'])

In [140]:
products = data["mods"]["listItems"]

df = pd.DataFrame(products)

In [142]:
df.to_csv("daraz_smartphone.csv")

In [144]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 41 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   name                  40 non-null     object
 1   nid                   40 non-null     object
 2   itemId                40 non-null     object
 3   icons                 40 non-null     object
 4   image                 40 non-null     object
 5   isSmartImage          40 non-null     bool  
 6   originalPriceShow     40 non-null     object
 7   priceShow             40 non-null     object
 8   ratingScore           40 non-null     object
 9   review                40 non-null     object
 10  location              40 non-null     object
 11  thumbs                40 non-null     object
 12  sellerName            40 non-null     object
 13  sellerId              40 non-null     object
 14  brandName             40 non-null     object
 15  brandId               40 non-null     obje

In [178]:
daraz_df = df.copy()

In [180]:
daraz_df.drop(columns=["nid", "itemId", "image", "isSmartImage", "longImageDisplayable", "skus", "promotionId", "isSponsored", "tItemType", "skuType",
                       "adFlag", "directSimilarUrl", "gridTitleLine", "isFission", "isBadgeAutoScroll", "showCart", "showBackIcon", "showUnitPrice",
                      "itemUrl", "querystring", "thumbs", "icons", "originalPriceShow"], inplace=True)

In [182]:
daraz_df

Unnamed: 0,name,priceShow,ratingScore,review,location,sellerName,sellerId,brandName,brandId,cheapest_sku,skuId,sku,categories,price,inStock,originalPrice,clickTrace,itemSoldCntShow
0,Samsung Galaxy A06 4GB RAM + 64 GB ROM,"Rs. 21,499",4.713221601489758,537.0,Sindh,Retail - Samsung,6005425472002,Samsung,4198,599915386_PK-2773207780,2773207780,599915386_PK,"[2, 3]",21499,True,26999.0,query:;nid:599915386;src:LazadaMainSrp;rn:9d7d...,6.7K sold
1,Combo Deal - Original ViVo Y85 with Box Charge...,"Rs. 10,392",4.60625,320.0,Sindh,SEL WEL,6005014023653,No Brand,39704,436809943_PK-2331308805,2331308805,436809943_PK,"[2, 3]",10392,True,25000.0,query:;nid:436809943;src:LazadaMainSrp;rn:9d7d...,1.1K sold
2,Global Variant Vivo Y17 with Box Fast Turbo Ch...,"Rs. 15,954",4.328767123287672,219.0,Sindh,SEL WEL,6005014023653,No Brand,39704,537237158_PK-2520394150,2520394150,537237158_PK,"[2, 3]",15954,True,30000.0,query:;nid:537237158;src:LazadaMainSrp;rn:9d7d...,823 sold
3,Samsung Galaxy A06 4GB RAM + 128 GB ROM,"Rs. 25,267",4.714285714285714,126.0,Sindh,Retail - Samsung,6005425472002,Samsung,4198,599970007_PK-2773276137,2773276137,599970007_PK,"[2, 3]",25267,True,31999.0,query:;nid:599970007;src:LazadaMainSrp;rn:9d7d...,2.0K sold
4,LIMITED STOCK: OPPO Reno12 F 5G | 12GB RAM + 2...,"Rs. 99,999",4.678082191780822,146.0,Punjab,OPPO OFFICIAL STORE,6005016057194,OPPO,12933,536508156_PK-3983595186,3983595186,536508156_PK,"[2, 3]",99999,True,,query:;nid:536508156;src:LazadaMainSrp;rn:9d7d...,791 sold
5,Samsung Galaxy A06 6GB RAM + 128 GB ROM,"Rs. 30,299",4.829268292682927,123.0,Sindh,Retail - Samsung,6005425472002,Samsung,4198,599952532_PK-2773264970,2773264970,599952532_PK,"[2, 3]",30299,True,37999.0,query:;nid:599952532;src:LazadaMainSrp;rn:9d7d...,2.4K sold
6,Vivo Y85 4gb RAM 64gb Memory Charger back cove...,"Rs. 9,799",4.343283582089552,134.0,Sindh,H_S STORE,6005025552111,No Brand,39704,493605285_PK-2350918858,2350918858,493605285_PK,"[2, 3]",9799,True,19499.0,query:;nid:493605285;src:LazadaMainSrp;rn:9d7d...,462 sold
7,Imported Vivo Y66 Gifts( Charger Sim Eject Too...,"Rs. 9,996",4.377777777777778,135.0,Sindh,SEL WEL,6005014023653,No Brand,39704,434604001_PK-2280434782,2280434782,434604001_PK,"[2, 3]",9996,True,20000.0,query:;nid:434604001;src:LazadaMainSrp;rn:9d7d...,493 sold
8,Imported ViVo S1 with Box Dual Engine Fast Cha...,"Rs. 18,996",4.333333333333333,54.0,Sindh,SEL WEL,6005014023653,No Brand,39704,504989519_PK-2590154132,2590154132,504989519_PK,"[2, 3]",18996,True,33000.0,query:;nid:504989519;src:LazadaMainSrp;rn:9d7d...,220 sold
9,"Oppo A57 - 5.2"" HD Display - 4GB RAM - 64GB RO...","Rs. 9,999",4.369047619047619,252.0,Sindh,Fashion Market,1115909,No Brand,39704,266428237_PK-1482107513,1482107513,266428237_PK,"[2, 3]",9999,True,19999.0,query:;nid:266428237;src:LazadaMainSrp;rn:9d7d...,718 sold


In [188]:
import requests
import pandas as pd
import time

# Base URL pattern
base_url = "https://www.daraz.pk/smartphones/?ajax=true&isFirstRequest=true&page={}"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/122 Safari/537.36"
}

all_products = []

# Loop through first N pages (Daraz usually has ~50+ pages for smartphones)
for page in range(1, 11):   # <-- change 11 to e.g. 51 if you want 50 pages
    url = base_url.format(page)
    print(f"Scraping page {page} ...")

    r = requests.get(url, headers=headers)
    if r.status_code != 200:
        print(f"⚠️  Skipped page {page} (status {r.status_code})")
        continue

    try:
        data = r.json()
        products = data.get("mods", {}).get("listItems", [])
        if not products:
            print(f"❌ No products found on page {page} — stopping.")
            break

        for item in products:
            all_products.append({
                "name": item.get("name"),
                "brand_name" : item.get("brandName"),
                "price": item.get("price"),
                "original_price": item.get("originalPrice"),
                "discount": item.get("discount"),
                "rating_score": item.get("ratingScore"),
                "review_count": item.get("review"),
                "item_sold" : item.get("itemSoldCntShow"),
                "seller_name" : item.get("sellerName"),                
                "product_url": "https:" + item.get("productUrl", ""),
                "image": "https:" + item.get("image", "")
            })

        # polite delay
        time.sleep(1)

    except ValueError:
        print(f"❌ Page {page} did not return valid JSON")
        continue

# Convert to DataFrame
df = pd.DataFrame(all_products)
print(f"\n✅ Scraped {len(df)} products total")
print(df.head())

Scraping page 1 ...
Scraping page 2 ...
Scraping page 3 ...
Scraping page 4 ...
Scraping page 5 ...
Scraping page 6 ...
Scraping page 7 ...
Scraping page 8 ...
Scraping page 9 ...
Scraping page 10 ...

✅ Scraped 400 products total
                                                name brand_name  price  \
0             Samsung Galaxy A06 4GB RAM + 64 GB ROM    Samsung  21499   
1  Combo Deal - Original ViVo Y85 with Box Charge...   No Brand  10392   
2  Global Variant Vivo Y17 with Box Fast Turbo Ch...   No Brand  15954   
3            Samsung Galaxy A06 4GB RAM + 128 GB ROM    Samsung  25267   
4  LIMITED STOCK: OPPO Reno12 F 5G | 12GB RAM + 2...       OPPO  99999   

  original_price discount       rating_score review_count  item_sold  \
0          26999     None  4.713221601489758          537  6.7K sold   
1          25000     None            4.60625          320  1.1K sold   
2          30000     None  4.328767123287672          219   823 sold   
3          31999     None  4.7142857

# Mens Care


In [195]:
import requests
import pandas as pd
import time

# Base URL pattern
base_url = "https://www.daraz.pk/catalog/?ajax=true&from=hp_categories&isFirstRequest=true&page=1&q=Men%27s%20Care&service=all_channel&spm=a2a0e.tm80331704.cate_3.1.da595aa7TwWsyJ&src=all_channel"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/122 Safari/537.36"
}

all_products = []

# Loop through first N pages (Daraz usually has ~50+ pages for smartphones)
for page in range(1, 11):   # <-- change 11 to e.g. 51 if you want 50 pages
    url = base_url.format(page)
    print(f"Scraping page {page} ...")

    r = requests.get(url, headers=headers)
    if r.status_code != 200:
        print(f"⚠️  Skipped page {page} (status {r.status_code})")
        continue

    try:
        data = r.json()
        products = data.get("mods", {}).get("listItems", [])
        if not products:
            print(f"❌ No products found on page {page} — stopping.")
            break

        for item in products:
            all_products.append({
                "name": item.get("name"),
                "brand_name" : item.get("brandName"),
                "price": item.get("price"),
                "original_price": item.get("originalPrice"),
                "discount": item.get("discount"),
                "rating_score": item.get("ratingScore"),
                "review_count": item.get("review"),
                "item_sold" : item.get("itemSoldCntShow"),
                "seller_name" : item.get("sellerName"),                
                "product_url": "https:" + item.get("productUrl", ""),
                "image": "https:" + item.get("image", "")
            })

        # polite delay
        time.sleep(1)

    except ValueError:
        print(f"❌ Page {page} did not return valid JSON")
        continue

# Convert to DataFrame
df = pd.DataFrame(all_products)
print(f"\n✅ Scraped {len(df)} products total")
print(df.head())

Scraping page 1 ...
Scraping page 2 ...
Scraping page 3 ...
Scraping page 4 ...
Scraping page 5 ...
Scraping page 6 ...
Scraping page 7 ...
Scraping page 8 ...
Scraping page 9 ...
Scraping page 10 ...

✅ Scraped 400 products total
                                                name brand_name price  \
0  LAST PIRATE Aftershave Splash | Non-alcoholic ...   No Brand  1199   
1  COLD FEAR Aftershave Splash | Non-alcoholic | ...   No Brand  1199   
2  The Best Quality Shaving Foams For Me Shaving ...   No Brand   559   
3  Heavendove Beard Growth Serum - 50ML | Thicker...   No Brand   606   
4  Yorker After shave Lotion 120ml - Soothe and M...   No Brand   100   

  original_price discount rating_score review_count item_sold     seller_name  \
0           1599     None                                None  AMFA Grooming    
1           1599     None                                None  AMFA Grooming    
2            811     None                                None     JJ Brothers   
3     

In [198]:
df.to_csv("daraz_menscare.csv")