# Jumia Web Scraper using Requests, BeautifulSoup, and Pandas

This notebook contains a complete Python script to scrape product data (name, price, old price, discount, and URL) from Jumia category pages and save the results into separate CSV files. The example uses the 'Smartphones' and 'Computing Devices' categories.

In [None]:
# Install necessary libraries
%pip install pandas beautifulsoup4 requests --quiet

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from typing import List, Dict

def extract_product_info(card) -> Dict[str, str]:
    """Extracts product details from a single BeautifulSoup product card element."""
    name_elem = card.select_one('h3.name')
    price_elem = card.select_one('div.prc')
    old_price_elem = card.select_one('div.old')
    discount_elem = card.select_one('div.bdg._dsct')
    link_elem = card.select_one('a.core')
    
    return {
        'Product Name': name_elem.text.strip() if name_elem else 'N/A',
        'Current Price': price_elem.text.strip() if price_elem else 'N/A',
        'Old Price': old_price_elem.text.strip() if old_price_elem else 'N/A',
        'Discount': discount_elem.text.strip() if discount_elem else 'N/A',
        'Product URL': f"https://www.jumia.co.ke{link_elem['href']}" if link_elem and 'href' in link_elem.attrs else 'N/A'
    }

def scrape_jumia_category(url: str, category_name: str) -> pd.DataFrame:
    """Scrapes a Jumia category page, processes product data, saves to CSV, and returns a DataFrame."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                      "(KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
    }
    try:
        # 1. Fetch the page content
        print(f"... Fetching data for {category_name} from: {url}")
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
        
        # 2. Parse the HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 3. Find all product cards (Jumia uses 'article.prd')
        product_cards = soup.select('article.prd')
        print(f"âœ“ Found {len(product_cards)} {category_name} on the page")
        
        # 4. Extract info from each card
        products = [
            extract_product_info(card) 
            for card in product_cards
        ]
        
        # 5. Create DataFrame and save to CSV
        df = pd.DataFrame(products)
        filename = f"jumia_{category_name.lower().replace(' ', '_')}.csv"
        df.to_csv(filename, index=False, encoding='utf-8')
        print(f"âœ“ Data saved to {filename}")
        
        return df
        
    except requests.RequestException as e:
        print(f"âœ— Error fetching data for {category_name}: {e}")
        return pd.DataFrame()

# --- Main Execution Block ---

# Scrape Smartphones
smartphones_df = scrape_jumia_category(
    url="https://www.jumia.co.ke/smartphones/",
    category_name="Smartphones"
)

print("\nðŸ“± Smartphone Sample Data:")
if not smartphones_df.empty:
    display(smartphones_df.head(5))

# Scrape Computing Devices
computing_df = scrape_jumia_category(
    url="https://www.jumia.co.ke/computing/",
    category_name="Computing Devices"
)

print("\nðŸ’» Computing Devices Sample Data:")
if not computing_df.empty:
    display(computing_df.head(5))

# Summary
print("\nðŸ“Š Scraping Summary:")
print(f"Total Smartphones: {len(smartphones_df)}")
print(f"Total Computing Devices: {len(computing_df)}")
print(f"\nTotal Products Scraped: {len(smartphones_df) + len(computing_df)}")