In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from urllib.parse import urljoin 
import time
import os


# Scraping cars data from Avito.ma

In [2]:
BASE_URL = "https://www.avito.ma"
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}

def scrape_cars(start_page=1, end_page=1):
    all_cars = []
    
    for page in range(start_page, end_page + 1):
        url = f"{BASE_URL}/fr/maroc/voiture?o={page}" if page > 1 else f"{BASE_URL}/fr/maroc/voiture"
        
        try:
            response = requests.get(url, headers=HEADERS)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            script = soup.find('script', id='__NEXT_DATA__')
            
            if not script:
                print(" No script tag found on page " + page)
                continue
                
            data = json.loads(script.string)
            cars = data["props"]["pageProps"]["initialReduxState"]["ad"]["search"]["ads"]
            print(cars[1])
            
            for car in cars:
                params = {}
                for param in car.get('params', {}).get('secondary', []):
                    params[param.get('key')] = param.get('value', 'null')
                
                car_info = {
                    "id": car.get("id", "null"),
                    "list_id": car.get("listId", "null"),
                    "title": car.get("subject", "null"),
                    "description": car.get("description", "null"),
                    "ad_type": car.get("adType", {}).get("label", "null"),
                    "category": car.get("category", {}).get("formatted", "null"),
                    "price": car.get("price", {}).get("value", "null"),
                    "currency": car.get("price", {}).get("currency", "null"),
                    "location": car.get("location", "null"),
                    "city": car.get("location", "null").split(",")[0].strip() if car.get("location") else "null",
                    "date_posted": car.get("date", "null"),
                    "seller_name": car.get("seller", {}).get("name", "null"),
                    "seller_type": car.get("seller", {}).get("type", "null"),
                    "seller_verified": car.get("seller", {}).get("isVerifiedSeller", False),
                    "brand": params.get("brand", "null"),
                    "model": params.get("model", "null"),
                    "year": params.get("regdate", "null"),
                    "mileage": params.get("mileage", "null"),
                    "fuel_type": params.get("fuel", "null"),
                    "transmission": params.get("transmission", "null"),
                    # "images": ", ".join(car.get("images", [])),
                    "url": urljoin(BASE_URL, car.get("href", "")) if car.get("href") else "null",
                    "is_premium": car.get("isPremium", False),
                    "is_urgent": car.get("isUrgent", False),
                }
                
                all_cars.append(car_info)
            
            print("Page " + page + " done: " + len(cars) + " cars found")
            time.sleep(2)  
            
        except Exception as e:
            print("Error on page " + page + ": " + str(e))
    
    return all_cars

cars_data = scrape_cars(start_page=1, end_page=200)
# pd.DataFrame(cars_data).to_excel("avito_cars_direct.xlsx", index=False)
# print(f"Saved {len(cars_data)} cars to Excel!")

output_file = "avito_cars.xlsx"

if os.path.exists(output_file):
    existing_data = pd.read_excel(output_file)
    all_data = pd.concat([existing_data, pd.DataFrame(cars_data)], ignore_index=True)
else:
    all_data = pd.DataFrame(cars_data)

# Save to Excel
all_data.to_excel(output_file, index=False)
print("Saved " + len(all_data) + " cars to Excel (added " + len(cars_data) +" new records)!")

{'id': '75388733', 'listId': '56146272', 'hasShipping': False, 'isEcommerce': False, 'category': {'formatted': "Voitures - Voitures d'occasion", 'name': "Voitures d'occasion", 'id': '2010', 'parent': {'id': '2100'}}, 'adType': {'key': 'SELL', 'label': 'à vendre'}, 'subject': 'Dacia logan essence automatique', 'description': 'Voiture bonne etat ', 'seller': {'type': 'STORE', 'name': 'LAVAGE AUTO SAAD', 'img': 'https://content.avito.ma/stores/1d/1dfba6cd-6de6-402f-a375-e590d1992f92.jpg', 'isVerifiedSeller': False}, 'price': {'value': 129000, 'currency': 'DH'}, 'oldPrice': {'value': 99000, 'currency': 'DH'}, 'defaultImage': 'https://content.avito.ma/classifieds/images/10133379983?t=images', 'images': ['https://content.avito.ma/classifieds/images/10133379983?t=images', 'https://content.avito.ma/classifieds/images/10133379946?t=images', 'https://content.avito.ma/classifieds/images/10133379947?t=images', 'https://content.avito.ma/classifieds/images/10133379948?t=images'], 'videos': [], 'para