In [None]:
!pip install playwright pandas
!playwright install


Collecting playwright
  Downloading playwright-1.54.0-py3-none-manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting pyee<14,>=13 (from playwright)
  Downloading pyee-13.0.0-py3-none-any.whl.metadata (2.9 kB)
Downloading playwright-1.54.0-py3-none-manylinux1_x86_64.whl (45.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.9/45.9 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyee-13.0.0-py3-none-any.whl (15 kB)
Installing collected packages: pyee, playwright
Successfully installed playwright-1.54.0 pyee-13.0.0
Downloading Chromium 139.0.7258.5 (playwright build v1181)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1181/chromium-linux.zip[22m
[1G172.5 MiB [] 0% 0.0s[0K[1G172.5 MiB [] 0% 50.3s[0K[1G172.5 MiB [] 0% 25.2s[0K[1G172.5 MiB [] 0% 15.7s[0K[1G172.5 MiB [] 0% 9.7s[0K[1G172.5 MiB [] 1% 6.8s[0K[1G172.5 MiB [] 1% 5.4s[0K[1G172.5 MiB [] 2% 4.1s[0K[1G172.5 MiB [] 3% 3.5s[0K[1G172.5 MiB [] 4% 3.2s

In [None]:
import nest_asyncio
import asyncio
from playwright.async_api import async_playwright
import pandas as pd
import time

nest_asyncio.apply()

products_details = []
PAGES = 4

async def scrape_jumia_smartwatches():
    global products_details
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True) #Launching a Chromium browser.
        context = await browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36") #Opening a new browser profile.
        page = await context.new_page() #	Opens a new browser tab.

        for page_number in range(1, PAGES + 1):
            url = f"https://www.jumia.com.eg/catalog/?q=smart+watch&page={page_number}"
            print(f"\n Scraping page {page_number}")

            try:
                await page.goto(url) #The browser goes to the specified url
                await page.wait_for_selector("article.prd", timeout=10000) #Wait until product items appear on the page.
            except Exception as e:
                print(f" Failed to load page {page_number}: {e}")
                continue

            #Selects all product with the class: article.prd
            products = await page.query_selector_all("article.prd")
            page_product_count = 0

            for product in products:
                try:
                    #Get product name
                    product_name = await product.query_selector("h3.name")
                    name = await product_name.inner_text() if product_name else "No Name Found"

                    #Get product price
                    product_price = await product.query_selector("div.prc")
                    price = await product_price.inner_text() if product_price else "No Price Found"

                    #Get product rate
                    product_rate = await product.query_selector("div.stars")
                    rate = await product_rate.inner_text() if product_rate else "No Rating Found"

                    #Get product discount
                    product_discount = await product.query_selector("div.bdg._dsct._sm")
                    discount = await product_discount.inner_text() if product_discount else "No Discount"

                    # Get link from image data-src
                    img_link = await product.query_selector("a.core")
                    href = await img_link.get_attribute("href") if img_link else None
                    link = f"https://www.jumia.com.eg{href}" if href else "No Link"

                    # Get image URL from img tag inside div.img-c
                    img_container = await product.query_selector("div.img-c img")
                    img_url = await img_container.get_attribute("data-src") if img_container else "No Image"

                    products_details.append({
                        "Website": "Jumia",
                        "Product Name": name.strip(),
                        "Price": price.strip(),
                        "Discount": discount.strip(),
                        "Rating": rate.strip(),
                        "Product URL": link,
                        "Image URL": img_url
                    })

                    page_product_count += 1

                except Exception as e:
                    print(f"Error parsing a product on page {page_number}: {e}")

            print(f"Page {page_number}: Scraped {page_product_count} products.")
            await asyncio.sleep(1) #waits 1 second to keeps the requests below 200 per min.

        await browser.close() #close the browser one finished

# Run scraper
start_time = time.time()
await scrape_jumia_smartwatches()
end_time = time.time()

# Save to CSV
df = pd.DataFrame(products_details)
df.to_csv("jumia_smartwatches_playwright.csv", index=False)

print(f"\n Finished scraping {len(products_details)} products in {round(end_time - start_time, 2)} seconds.")
print(" Saved to 'jumia_smartwatches_playwright.csv'")



 Scraping page 1
Page 1: Scraped 43 products.

 Scraping page 2
Page 2: Scraped 40 products.

 Scraping page 3
Page 3: Scraped 40 products.

 Scraping page 4
Page 4: Scraped 40 products.

 Finished scraping 163 products in 79.72 seconds.
 Saved to 'jumia_smartwatches_playwright.csv'
