In [26]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

In [27]:
def scrape_products_from_page(url):
    response = requests.get(url)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    
    # Find all product name elements
    product_elements = soup.find_all('form', class_='product-item')
    #print(product_elements[0].prettify())

    products=[]
    # Extract and print the names
    for product in product_elements:
        try:
            # Product Link
            link_tag = product.find("a", class_="product-item-link")
            product_link = link_tag['href'] if link_tag else None
            
            # Product Full Name
            full_name = link_tag.text.strip() if link_tag else None
            
            # Image URL
            img_tag = product.find("img", class_="product-image-photo")
            image_url = img_tag['src'] if img_tag else None
            
            # Price
            price_tag = product.find("span", class_="price")
            price = price_tag.text.strip() if price_tag else None

            specs = {}
            #Additional data from product page
            if product_link:
                response = requests.get(product_link)
                if response.status_code == 200:                    
                    product_page = BeautifulSoup(response.text, 'html.parser')
                    table = product_page.find('table')  

                    for row in table.find_all('tr'):
                            cells = row.find_all(['td', 'th'])
                            if len(cells) >= 2:
                                key = cells[0].get_text(strip=True)
                                value = cells[1].get_text(strip=True)
                                specs[key] = value


            # Append product data
            product_dict = {
                "lien": product_link,
                "nom": full_name,
                "image_url": image_url,
                "prix": price,
                "boutique":"Batam"
            }

            # Add specs dictionary to product_data (expand keys dynamically)
            product_dict.update(specs)

            # Append to products list
            products.append(product_dict)
            
        except Exception as e:
            print(f"Error processing product: {e}")
   
    return products

In [28]:
base_url = "https://batam.com.tn/informatique/ordinateur-portable.html?p={}"
all_products = []

for page_num in range(1, 9):
    print(f"Scraping page {page_num}...")
    url = base_url.format(page_num)
    page_products = scrape_products_from_page(url)
    all_products.extend(page_products)
    time.sleep(1) #A small delay between requests to avoid hammering the server.

print(f"Total products scraped: {len(all_products)}")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Total products scraped: 94


In [29]:
list(all_products[0].keys())

['lien',
 'nom',
 'image_url',
 'prix',
 'boutique',
 'Référence',
 'Marque',
 'Gamme pc',
 'Gamer',
 "Système d'exploitation",
 "Taille de l'écran",
 'Écran',
 'Résolution écran',
 'Écran tactile',
 'Processeur',
 'Type de processeur',
 'Référence processeur',
 'Fréquence processeur',
 'Mémoire',
 'Disque dur',
 'Type de disque dur',
 'Carte graphique',
 'Chipset graphique',
 'Connecteurs',
 'Couleur',
 'Garantie']

In [30]:
columns_to_keep = ['Référence', 'lien', 'nom', 'Marque', 'image_url', 'boutique', 'Taille de l\'écran',
                   'Résolution écran', 'Processeur', 'Référence processeur', 'Mémoire','Disque dur', 
                   'Type de disque dur', 'Carte graphique', "Système d'exploitation",'Gamer', 'prix']

# Filter the dictionary to keep only the desired columns
filtered_product_features = [{key: product.get(key) for key in columns_to_keep}
                             for product in all_products]

In [31]:
df = pd.DataFrame(filtered_product_features)
df.head()

Unnamed: 0,Référence,lien,nom,Marque,image_url,boutique,Taille de l'écran,Résolution écran,Processeur,Référence processeur,Mémoire,Disque dur,Type de disque dur,Carte graphique,Système d'exploitation,Gamer,prix
0,82QY00PEFE,https://batam.com.tn/pc-lenovo-n4500-8gb-256-s...,PC Portable LENOVO V15 G2 IJL Intel Celeron N4...,LENOVO,https://batam.com.tn/media/catalog/product/cac...,Batam,"15,6''",1920 x 1080,Intel Celeron,Intel® Celeron® N4500,8 Go,256GO,SSD,Graphique Intégrée,FreeDos,Non,"729,000 DT"
1,82LX00CKFG,https://batam.com.tn/pc-lenovon-celeron-8-256b...,PC Portable LENOVO IdeaPad 1 15IJL7 Intel Cele...,LENOVO,https://batam.com.tn/media/catalog/product/cac...,Batam,"15,6''",1366 x 768,Intel Celeron,Intel® Celeron® N4500,8 Go,256GO,SSD,Graphique Intégrée,FreeDos,Non,"729,000 DT"
2,82LX00CFFG,https://batam.com.tn/pc-lenovon-celeron-8-256b...,PC Portable LENOVO IdeaPad 1 15IJL7 Intel Cele...,LENOVO,https://batam.com.tn/media/catalog/product/cac...,Batam,"15,6''",1366 x 768,Intel Celeron,Intel® Celeron® N4500,8 Go,256GO,SSD,Graphique Intégrée,FreeDos,Non,"729,000 DT"
3,X515KA-EJ008,https://batam.com.tn/asus-celeron-4-256-silver...,PC Portable ASUS Intel Celeron N4500 4Go 256G...,ASUS,https://batam.com.tn/media/catalog/product/cac...,Batam,"15,6''",1920 x 1080,Intel Celeron,Intel® Celeron® N4500,4 Go,256GO,SSD,Graphique Intégrée,FreeDos,Non,"769,000 DT"
4,82VG00NYFG,https://batam.com.tn/ip1-amd-athlon8g-2566-w11...,PC Portable LENOVO IdeaPad 1 15AMN7 AMD Athlon...,LENOVO,https://batam.com.tn/media/catalog/product/cac...,Batam,"15,6''",1366 x 768,AMD Athlon,AMD Athlon Silver 7120U,8 Go,256GO,SSD,AMD Radeon,Windows 11 Famille,Non,"799,000 DT"


In [32]:
df.describe().T

Unnamed: 0,count,unique,top,freq
Référence,94,91,83EM008XFG,2
lien,94,94,https://batam.com.tn/pc-lenovo-n4500-8gb-256-s...,1
nom,94,92,APPLE MacBook Pro M3 Pro 18Go 512Go SSD - Noir...,2
Marque,94,6,LENOVO,27
image_url,94,94,https://batam.com.tn/media/catalog/product/cac...,1
boutique,94,1,Batam,94
Taille de l'écran,94,6,"15,6''",74
Résolution écran,84,8,1920 x 1080,59
Processeur,94,17,Intel Core i5,27
Référence processeur,85,35,Apple M3,8


In [33]:
# Save to CSV
df.to_csv("batam_products.csv", index=False, encoding='utf-8-sig')

print("Data saved to batam_products.csv")

Data saved to batam_products.csv
