In [12]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

In [13]:
def scrape_products_from_page(url):
    response = requests.get(url)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    
    # Find all product name elements
    product_elements = soup.find_all('div', class_='thumbnail-container')
    #print(product_elements[0].prettify())
    #print(len(product_elements))
    
    products=[]
    # Extract and print the names
    
    for product in product_elements[:-1]:
        try:
            # Product ref
            ref_div = product.find("div", class_="product-reference").find("span", itemprop="sku")
            product_ref = ref_div.text.strip() if ref_div else None
            
            # Product Link
            link_tag = product.find("a", class_="thumbnail product-thumbnail")
            product_link = link_tag['href'] if link_tag else None
            
            # Product Full Name
            name_tag = product.find("h3", class_="product-title").find("a")
            full_name = name_tag.text.strip() if name_tag else None
            
            # Image URL
            img_tag = product.find("div", class_="thumbnail-container-image").find("a").find("img")
            image_url = img_tag['content'].strip() if img_tag else None
            
            # Price
            price_tag = product.find("span", class_="money")
            price = price_tag.text.strip() if price_tag else None
            
            specs = {}
            #Additional data from product page
            if product_link:
                response = requests.get(product_link)
                if response.status_code == 200:                    
                    product_page = BeautifulSoup(response.text, 'html.parser')
                    #product_features = product_page.find("section", class_="product-features")
                    data_sheet = product_page.find("dl", class_="data-sheet")
                    if data_sheet:
                        dt_tags = data_sheet.find_all("dt", class_="name")
                        dd_tags = data_sheet.find_all("dd", class_="value")

                        for dt, dd in zip(dt_tags, dd_tags):
                            key = dt.get_text(strip=True)
                            value = dd.get_text(strip=True)
                            specs[key] = value

            
            # Append product data
            product_dict = {
                "reference":product_ref,
                "lien": product_link,
                "nom": full_name,
                "image_url": image_url,
                "prix": price,
                "boutique":"agora"
            }

            # Add specs dictionary to product_data (expand keys dynamically)
            product_dict.update(specs)

            # Append to products list
            products.append(product_dict)
            
        
        except Exception as e:
            print(f"Error processing product: {e}")
    
    return products

In [14]:
base_url = "https://agora.tn/fr/11-ordinateur-portable?page={}"
all_products = []

for page_num in range(1, 8):
    print(f"Scraping page {page_num}...")
    url = base_url.format(page_num)
    page_products = scrape_products_from_page(url)
    all_products.extend(page_products)
    time.sleep(1) #A small delay between requests to avoid hammering the server.

print(f"Total products scraped: {len(all_products)}")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Total products scraped: 146


In [15]:
list(all_products[0].keys())

['reference',
 'lien',
 'nom',
 'image_url',
 'prix',
 'boutique',
 "Taille De L'ecran Pc",
 'Processeur',
 'Génération de  Processeur',
 'Carte Graphique',
 'CAPACITÉ DE DISQUE',
 'Memoire RAM',
 'Type Memoire',
 'Ecran Incurvé',
 'Ecran Tactile',
 'Gamer',
 "Systeme D'exploitation",
 "Type de l'écran",
 'Couleur',
 'Garantie']

In [16]:
columns_to_keep = ['reference', 'lien', 'nom', 'Marque', 'image_url', 'boutique', 'Taille De L\'ecran Pc',
                   'Résolution écran', 'Processeur', 'reference Processeur','Memoire RAM','CAPACITÉ DE DISQUE', 
                   'Type de disque dur', 'Carte Graphique', 'Systeme D\'exploitation','Gamer', 'prix']

# Filter the dictionary to keep only the desired columns
filtered_product_features = [{key: product.get(key) for key in columns_to_keep}
                             for product in all_products]

In [17]:
df = pd.DataFrame(filtered_product_features)
df.head()

Unnamed: 0,reference,lien,nom,Marque,image_url,boutique,Taille De L'ecran Pc,Résolution écran,Processeur,reference Processeur,Memoire RAM,CAPACITÉ DE DISQUE,Type de disque dur,Carte Graphique,Systeme D'exploitation,Gamer,prix
0,X515KA-EJ008,https://agora.tn/fr/pc-portable/16616-pc-porta...,PC PORTABLE ASUS X515KA CELERON N4500U 4GO 256...,,https://agora.tn/fr/46133-home_default/pc-port...,agora,"15.6""",,Intel Celeron-Dual Core,,4 Go,256 Go SSD,,Graphique Intégrée,FreeDos,Non,"719,000 TND"
1,X515KA-EJ008-8,https://agora.tn/fr/pc-portable/16664-pc-porta...,PC PORTABLE ASUS X515KA CELERON N4500U 8GO 256...,,https://agora.tn/fr/46726-home_default/pc-port...,agora,"15.6""",,Intel Celeron-Dual Core,,8 Go,256 Go SSD,,Graphique Intégrée,FreeDos,Non,"739,000 TND"
2,X1504VA-NJ520W,https://agora.tn/fr/pc-portable/15389-pc-porta...,PC PORTABLE ASUS VIVOBOOK 15 X1504VA I3-1315U ...,,https://agora.tn/fr/46316-home_default/pc-port...,agora,"15.6""",,Intel Core i3,,4 Go,256 Go SSD,,Graphique Intégrée,Windows,Non,"1 049,000 TND"
3,INS-3535-R5,https://agora.tn/fr/pc-portable/16867-pc-porta...,PC PORTABLE DELL INSPIRON 3535 AMD RAYZEN 5 8G...,,https://agora.tn/fr/46775-home_default/pc-port...,agora,"15.6""",,AMD RYZEN 5,,8 Go,512 Go SSD,,AMD Radeon,FreeDos,Non,"1 169,000 TND"
4,X1504VA-NJ814W,https://agora.tn/fr/pc-portable/17170-pc-porta...,PC PORTABLE ASUS VIVOBOOK 15 I3-1315U 8GO 512G...,,https://agora.tn/fr/48086-home_default/pc-port...,agora,"15.6""",,Intel Core i3,,8 Go,512 Go SSD,,Graphique Intégrée,Windows 11,Non,"1 189,000 TND"


In [18]:
df.describe().T

Unnamed: 0,count,unique,top,freq
reference,146,146,X515KA-EJ008,1.0
lien,146,146,https://agora.tn/fr/pc-portable/16616-pc-porta...,1.0
nom,146,143,PC PORTABLE DELL VOSTRO 3530 I3-1305U 8GO 256G...,2.0
Marque,0,0,,
image_url,146,146,https://agora.tn/fr/46133-home_default/pc-port...,1.0
boutique,146,1,agora,146.0
Taille De L'ecran Pc,146,5,"15.6""",119.0
Résolution écran,0,0,,
Processeur,146,13,Intel Core i5,43.0
reference Processeur,0,0,,


In [19]:
# Save to CSV
df.to_csv("agora_products.csv", index=False, encoding='utf-8-sig')

print("Data saved to agora_products.csv")

Data saved to agora_products.csv
