In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import pandas as pd
import time
import os

In [3]:

# === Setup Selenium WebDriver ===
driver_path = r"D:\chromedriver-win64\chromedriver-win64\chromedriver.exe"  # <-- Update this path
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("window-size=1920x1080")
options.add_argument("user-agent=Mozilla/5.0")
service = Service(driver_path)
driver = webdriver.Chrome(service=service, options=options)


In [4]:
# === Scraper for Amazon.eg - Men Jeans ===
def scrape_amazon_men_jeans(url):
    driver.get(url)
    time.sleep(3)  # Wait for JS content to load
    soup = BeautifulSoup(driver.page_source, "html.parser")

    products = soup.find_all("div", {"data-asin": True})
    data = []

    for product in products:
        name_tag = product.find("span", class_="a-text-normal")
        name = name_tag.text.strip() if name_tag else None

        price_whole = product.find("span", class_="a-price-whole")
        price_fraction = product.find("span", class_="a-price-fraction")
        if price_whole:
            price = price_whole.text.strip()
            if price_fraction:
                price += "." + price_fraction.text.strip()
        else:
            price = None

        rating_tag = product.find("span", class_="a-icon-alt")
        rating = rating_tag.text.strip() if rating_tag else None

        reviews_tag = product.find("span", class_="a-size-base")
        reviews = reviews_tag.text.strip() if reviews_tag else None

        link_tag = product.find("a", class_="a-link-normal")
        product_url = "https://www.amazon.eg" + link_tag.get("href") if link_tag else None

        img_tag = product.find("img", class_="s-image")
        img_url = img_tag.get("src") if img_tag else None

        brand = name.split()[0] if name else None

        data.append({
            "Product Name": name,
            "Price": price,
            "Rating": rating,
            "Reviews": reviews,
            "Product URL": product_url,
            "Image URL": img_url,
            "Brand": brand
        })
    return data

In [5]:

# === Scraper for Jumia.com.eg - Cellphones ===
def scrape_jumia_cellphones(url):
    driver.get(url)
    time.sleep(3)  # Wait for JS to load
    soup = BeautifulSoup(driver.page_source, "html.parser")

    products = soup.find_all("article", class_="prd")
    data = []

    for product in products:
        name_tag = product.find("h3", class_="name")
        name = name_tag.text.strip() if name_tag else None

        price_tag = product.find("div", class_="prc")
        price = price_tag.text.strip() if price_tag else None

        rating_tag = product.find("div", class_="stars")
        if rating_tag and rating_tag.has_attr("aria-label"):
            rating = rating_tag["aria-label"]
        elif rating_tag:
            stars = rating_tag.find_all("svg")
            rating = len(stars) if stars else None
        else:
            rating = None

        review_tag = product.find("div", class_="rev")
        reviews = review_tag.text.strip() if review_tag else None

        link_tag = product.find("a", class_="core")
        product_url = "https://www.jumia.com.eg" + link_tag.get("href") if link_tag else None

        img_tag = product.find("img")
        img_url = img_tag.get("data-src") or img_tag.get("src") if img_tag else None

        brand = name.split()[0] if name else None

        data.append({
            "Product Name": name,
            "Price": price,
            "Rating": rating,
            "Reviews": reviews,
            "Product URL": product_url,
            "Image URL": img_url,
            "Brand": brand
        })
    return data


In [6]:


# === Main execution ===

all_amazon_data = []
all_jumia_data = []

# Number of pages to scrape
amazon_pages = 5
jumia_pages = 5


In [7]:

# Scrape Amazon Men Jeans
print("Scraping Amazon.eg Men Jeans...")
for page in range(1, amazon_pages + 1):
    url = f"https://www.amazon.eg/s?k=men+jeans&page={page}"
    print(f"  Amazon page {page}")
    page_data = scrape_amazon_men_jeans(url)
    all_amazon_data.extend(page_data)
    time.sleep(2)


Scraping Amazon.eg Men Jeans...
  Amazon page 1
  Amazon page 2
  Amazon page 3
  Amazon page 4
  Amazon page 5


In [8]:
# Save Amazon data
amazon_df = pd.DataFrame(all_amazon_data)
amazon_csv = os.path.join(os.getcwd(), "amazon_men_jeans.csv")
amazon_df.to_csv(amazon_csv, index=False)
print(f"Amazon data saved to {amazon_csv}")

Amazon data saved to C:\Users\agama\Desktop\Konecta\Second Task\Second Trial\amazon_men_jeans.csv


In [9]:

# Scrape Jumia Cellphones
print("Scraping Jumia.com.eg Cellphones...")
for page in range(1, jumia_pages + 1):
    url = f"https://www.jumia.com.eg/smartphones/?page={page}"
    print(f"  Jumia page {page}")
    page_data = scrape_jumia_cellphones(url)
    all_jumia_data.extend(page_data)
    time.sleep(2)


Scraping Jumia.com.eg Cellphones...
  Jumia page 1
  Jumia page 2
  Jumia page 3
  Jumia page 4
  Jumia page 5


In [10]:

# Save Jumia data
jumia_df = pd.DataFrame(all_jumia_data)
jumia_csv = os.path.join(os.getcwd(), "jumia_cellphones.csv")
jumia_df.to_csv(jumia_csv, index=False)
print(f"Jumia data saved to {jumia_csv}")


Jumia data saved to C:\Users\agama\Desktop\Konecta\Second Task\Second Trial\jumia_cellphones.csv


In [11]:

# Close driver
driver.quit()
print("Scraping complete!")

Scraping complete!
