# Amazon Egypt Mobile Phones Web Scraper

This notebook scrapes mobile phone data from Amazon Egypt using Selenium. The data includes:
- Title
- Rating
- Price

In [None]:
# Import necessary libraries
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

In [25]:
# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36")

# Initialize the Chrome WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# URL to scrape
url = "https://www.amazon.eg/s?i=electronics&rh=n%3A21832883031&s=popularity-rank&fs=true&language=en&ref=lp_21832883031_sar"

# Open the URL
driver.get(url)

# Wait for the page to load
time.sleep(5)

print("Webpage loaded successfully")

Webpage loaded successfully


In [26]:
# Execute the scraping using Selenium and BeautifulSoup
try:
    print("Starting to scrape Amazon Egypt mobile phones...")
    all_products = []
    
    # Get the page source and parse with BeautifulSoup
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    
    # Try different selectors to find product containers
    selectors = [
        'div.s-result-item[data-component-type="s-search-result"]',
        'div.sg-col-4-of-24',
        'div.s-card-container',
        'div.sg-col-inner'
    ]
    
    product_containers = []
    for selector in selectors:
        product_containers = soup.select(selector)
        if product_containers:
            print(f"Found {len(product_containers)} products using selector: {selector}")
            break
    
    # Limit to first 20 products
    product_containers = product_containers[:20]
    
    # Process each product
    for container in product_containers:
        product_data = {}
        
        # Extract title - try different selectors
        title_element = (
            container.select_one('h2 a span') or 
            container.select_one('h2') or 
            container.select_one('a.a-link-normal span.a-text-normal')
        )
        
        if title_element:
            product_data['title'] = title_element.text.strip()
        else:
            continue  # Skip products without title
        
        # Extract rating - try different selectors
        rating_element = (
            container.select_one('i.a-icon-star-small') or 
            container.select_one('span.a-icon-alt') or
            container.select_one('div.a-row span.a-icon-alt')
        )
        
        if rating_element:
            product_data['rating'] = rating_element.text.strip()
        else:
            product_data['rating'] = None
        
        # Extract price - try different selectors
        price_element = (
            container.select_one('span.a-price-whole') or 
            container.select_one('span.a-offscreen') or
            container.select_one('span.a-color-base')
        )
        
        if price_element:
            product_data['price'] = price_element.text.strip()
        else:
            product_data['price'] = None
        
        # Add to list of products
        all_products.append(product_data)
        print(f"Added product: {product_data['title'][:30]}...")
    
    # Create a DataFrame from the scraped data
    df = pd.DataFrame(all_products)
    
    # Display the first few rows
    print(f"\nScraped {len(df)} products")
    display(df.head())
    
    # Save to CSV
    df.to_csv('amazon_egypt_mobile_phones.csv', index=False)
    print("Data saved to amazon_egypt_mobile_phones.csv")

except Exception as e:
    print(f"Error during scraping: {e}")

finally:
    # Close the WebDriver
    driver.quit()
    print("WebDriver closed")

Starting to scrape Amazon Egypt mobile phones...
Found 27 products using selector: div.s-result-item[data-component-type="s-search-result"]
Added product: Samsung Galaxy A06 Dual Sim 6G...
Added product: Redmi14C (8+256G) Midnight Bla...
Added product: Xiaomi Redmi A5 Smartphone 4GB...
Added product: Xiaomi Redmi Note 14 Smartphon...
Added product: Infinix SMART 10–Iris Blue"120...
Added product: Samsung Galaxy A55 5G, Android...
Added product: Xiaomi POCO C75 C3NP Immersive...
Added product: Samsung Galaxy A26 5G, Android...
Added product: Samsung Galaxy A16 LTE, Androi...
Added product: Samsung Galaxy M55 5G, Android...
Added product: Samsung Galaxy A36 5G, Android...
Added product: Samsung Galaxy A05s, Android S...
Added product: Infinix Hot 50i (6+6) 12GB RAM...
Found 27 products using selector: div.s-result-item[data-component-type="s-search-result"]
Added product: Samsung Galaxy A06 Dual Sim 6G...
Added product: Redmi14C (8+256G) Midnight Bla...
Added product: Xiaomi Redmi A5 Sma

Unnamed: 0,title,rating,price
0,Samsung Galaxy A06 Dual Sim 6GB RAM 128GB Stor...,4.2 out of 5 stars,5832.
1,Redmi14C (8+256G) Midnight Black MediaTek Heli...,4.0 out of 5 stars,3 sizes
2,"Xiaomi Redmi A5 Smartphone 4GB + 128GB, Batter...",3.7 out of 5 stars,4555.
3,"Xiaomi Redmi Note 14 Smartphone, 8 + 256 GB, B...",4.2 out of 5 stars,9444.
4,"Infinix SMART 10–Iris Blue""120Hz LCD Display 6...",4.2 out of 5 stars,3999.


Data saved to amazon_egypt_mobile_phones.csv
WebDriver closed
WebDriver closed


In [27]:
# Analyze the scraped data
# Load the data from CSV
df = pd.read_csv('amazon_egypt_mobile_phones.csv')

# Show basic statistics
print("Data shape:", df.shape)
print("\nMissing values:")
print(df.isnull().sum())

# Show price statistics if available
if 'price' in df.columns and df['price'].notna().any():
    # Clean price values
    df['price_clean'] = df['price'].str.extract(r'([\d,]+\.?\d*)').squeeze()
    df['price_clean'] = df['price_clean'].str.replace(',', '').astype(float)
    
    print("\nPrice statistics:")
    print(f"Average: {df['price_clean'].mean():.2f}")
    print(f"Minimum: {df['price_clean'].min():.2f}")
    print(f"Maximum: {df['price_clean'].max():.2f}")

# Display the data
display(df)

Data shape: (20, 3)

Missing values:
title     0
rating    0
price     0
dtype: int64

Price statistics:
Average: 9966.25
Minimum: 3.00
Maximum: 31950.00


Unnamed: 0,title,rating,price,price_clean
0,Samsung Galaxy A06 Dual Sim 6GB RAM 128GB Stor...,4.2 out of 5 stars,5832.,5832.0
1,Redmi14C (8+256G) Midnight Black MediaTek Heli...,4.0 out of 5 stars,3 sizes,3.0
2,"Xiaomi Redmi A5 Smartphone 4GB + 128GB, Batter...",3.7 out of 5 stars,4555.,4555.0
3,"Xiaomi Redmi Note 14 Smartphone, 8 + 256 GB, B...",4.2 out of 5 stars,9444.,9444.0
4,"Infinix SMART 10–Iris Blue""120Hz LCD Display 6...",4.2 out of 5 stars,3999.,3999.0
5,"Samsung Galaxy A55 5G, Android Smartphone, Dua...",3.8 out of 5 stars,16199.,16199.0
6,"Xiaomi POCO C75 C3NP Immersive 6.88"" display 5...",4.1 out of 5 stars,6290.,6290.0
7,"Samsung Galaxy A26 5G, Android Smartphone, 256...",4.4 out of 5 stars,13599.,13599.0
8,"Samsung Galaxy A16 LTE, Android Smartphone, Du...",4.3 out of 5 stars,8943.,8943.0
9,"Samsung Galaxy M55 5G, Android Smartphone, Dua...",4.2 out of 5 stars,12999.,12999.0
