In [5]:
import requests
from bs4 import BeautifulSoup as bs
import time
import csv

def get_car_links(main_url, page_number):
    try:
        url = f"{main_url}?o={page_number}"
        response = requests.get(url, headers={'User-agent': 'Mozilla/5.0'})
        response.raise_for_status()

        soup = bs(response.text, 'html.parser')
        car_links = [a['href'] for a in soup.select('a.olx-ad-card__link-wrapper')]
        return car_links
    except requests.exceptions.RequestException as e:
        print(f"Erreur lors de la récupération des liens de la page {page_number} : {e}")
        return []

def scrape_car_details(car_url):
    try:
        response = requests.get(car_url, headers={'User-agent': 'Mozilla/5.0'})
        response.raise_for_status()

        car_soup = bs(response.text, 'html.parser')
        car_details = car_soup.find_all(class_='ad__sc-1g2w54p-1 WNDyV olx-d-flex olx-fd-column')

        # Processus des données récupérées
        details_text = '\n'.join(detail.text for detail in car_details)
        print(details_text)

        return details_text

    except requests.exceptions.RequestException as e:
        print(f"Erreur lors de la récupération des détails de la voiture : {e}")
        return None

main_page_url = 'https://www.olx.com.br/autos-e-pecas/carros-vans-e-utilitarios'
total_pages = 101  # Modifier le nombre total de pages selon les besoins
car_details_list = []

for page_number in range(1, total_pages + 1):
    car_links = get_car_links(main_page_url, page_number)

    for car_link in car_links:
        print(f"Scrapping details for car: {car_link}")
        car_detail = scrape_car_details(car_link)
        
        if car_detail:
            car_details_list.append(car_detail)
        else:
            print(f"Aucun détail trouvé pour la voiture : {car_link}")
        
        time.sleep(2)  # Respectez les politiques du site pour éviter d'être bloqué

# Ajout de débogage
print("Nombre total de détails récupérés :", len(car_details_list))

# Écrire les détails des voitures dans un fichier CSV
with open('car_details.csv', 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['Car Details'])
    
    for detail in car_details_list:
        csv_writer.writerow([detail])


Scrapping details for car: https://ce.olx.com.br/fortaleza-e-regiao/autos-e-pecas/carros-vans-e-utilitarios/t-cross-hl-tsi-aut-cinza-2021-2021-1083293915
CaracterísticasVOLKSWAGEN T-CROSS HIGHLINE 1.4 TSI FLEX 16V 5P AUTCategoriaCarros, vans e utilitáriosModeloVOLKSWAGEN T-CROSS HIGHLINE 1.4 TSI FLEX 16V 5P AUTMarcaVOLKSWAGENTipo de veículoSUVAno2021Quilometragem16000CombustívelFlexPossui Kit GNVNãoCâmbioAutomáticoPortas4 portasFinal de placa3Ver OpcionaisOpcionaisAlarmeAr condicionado
Scrapping details for car: https://pb.olx.com.br/paraiba/autos-e-pecas/carros-vans-e-utilitarios/renegade-longitude-dark-1244615962
CaracterísticasJEEP RENEGADE LONG. T270 1.3 TB 4X2 FLEX AUT.CategoriaCarros, vans e utilitáriosModeloJEEP RENEGADE LONG. T270 1.3 TB 4X2 FLEX AUT.MarcaJEEPTipo de veículoSUVAno2024Quilometragem0Potência do motor1.3CombustívelFlexPossui Kit GNVNãoCâmbioAutomáticoCorAzulPortas4 portasTipo de direçãoElétrica
Scrapping details for car: https://pe.olx.com.br/grande-recife/autos-e