# 🛒 Web Scraping Samsung Products from Noon Egypt  

## Introduction  
- **Project**: Scraping Samsung products from Noon Egypt  
- **Goal**: Collect product details (name, price, rating, brand, etc.)  
- **Tools**: Requests, BeautifulSoup, Selenium, Pandas  

## Scraping Section

### 2. Import Libraries

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options

### 3. Collect Product Links (Requests + BeautifulSoup)

In [None]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/115.0.0.0 Safari/537.36"
}

links = []
for i in range(1, 26):  # scrape first 25 pages
    url = f"https://www.noon.com/egypt-en/samsung/?page={i}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    for a in soup.find_all("a", class_="PBoxLinkHandler_productBoxLink__fAJHN"):
        href = a.get("href")
        if href:
            links.append("https://www.noon.com" + href)

### 4. Scrape Product Data (Selenium)

In [None]:
batch_size = 100
start = 0  

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
products_list = []

for idx, link in enumerate(links[start:start+batch_size]):
    try:
        print(f"Scraping product {idx+1}/{batch_size}")
        driver.get(link)
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Extract product details
        image = soup.find("img", class_="imageMagnify")
        image = image["src"] if image else None

        name = soup.find("span", class_="ProductTitle_title__vjUBn")
        name = name.get_text(strip=True) if name else None

        price = soup.find("span", class_="PriceOfferV2_priceNowText__fk5kK")
        price = price.get_text(strip=True) if price else None

        rating = soup.find("span", class_="RatingPreviewStarV2_text__XseM1")
        rating = rating.get_text(strip=True) if rating else None

        number_of_ratings = soup.find("span", class_="RatingPreviewStarV2_countText__OVzD2")
        number_of_ratings = number_of_ratings.get_text(strip=True) if number_of_ratings else None

        breadcrumb_links = soup.find_all("a", class_="Breadcrumb_breadcrumb__74hod")
        categories = [a.find_all("span")[-1].get_text(strip=True) for a in breadcrumb_links]
        category = categories[1] if len(categories) > 1 else None
        subcategory = categories[-1] if len(categories) > 0 else None

        brand = soup.find("span", class_="BrandStoreCtaV2_textContent__6tPjk")
        brand = brand.get_text(strip=True) if brand else None

        discount = soup.find("span", class_="PriceOfferV2_profit__6gHFc")
        discount = discount.get_text(strip=True) if discount else None

        sold_by = soup.find(class_="PartnerRatingsV2_soldBy__IOCr1")
        sold_by = sold_by.get_text(strip=True) if sold_by else None

        best_seller_tag = soup.find("span", class_="CategoryBestSellerRankV2_rank__jyrnN")
        best_seller_rank = best_seller_tag.get_text(strip=True) if best_seller_tag else None

        category_tag = soup.find("span", class_="CategoryBestSellerRankV2_category__ROWYW")
        best_seller_category = category_tag.get_text(strip=True) if category_tag else None

        products_list.append((
            name, rating, number_of_ratings, category, subcategory,
            brand, price, discount, sold_by,
            best_seller_rank, best_seller_category, image
        ))
    except Exception as e:
        print(f"Error at index {idx + start}: {e}")

driver.quit()

### 5. Save Data to DataFrame and Export to CSV

In [None]:
columns = [
    "name", "rating", "number_of_ratings", "category", "subcategory",
    "brand", "price", "discount", "sold_by",
    "best_seller_rank", "best_seller_category", "image"
]

products_df = pd.DataFrame(products_list, columns=columns)

products_df.to_csv("products.csv", index=False)

## Conclusion  

✅ Successfully scraped **Samsung products** from Noon.  
✅ Extracted details: *name, price, ratings, brand, category, etc.*  
✅ Data exported as **products.csv**.  

### 🔜 Next Steps  
- 🧹 Add cleaning for missing values  
- 📊 Perform deeper analysis (pricing trends, top-rated products)  
- 📈 Create visualizations for better insights  