In [64]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from bs4 import BeautifulSoup
import os
from datetime import datetime

def file_config(start_url, i=2):
    # Configuration du driver
    driver_path = r"/usr/local/bin/chromedriver"
    options = webdriver.ChromeOptions()
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--headless')
    driver = webdriver.Chrome(executable_path=driver_path, options=options)
    driver.set_page_load_timeout(6000)
    attempts = i
    while attempts >0:
        try :
            driver.get(start_url)
            driver.maximize_window()
            attempts = 0
        except TimeoutException:
            attempts = attempts - 1
    return driver

driver = file_config(start_url = r"https://annonces-immobilieres-cote-ivoire.com/annonces-immobilieres/?status=a-louer")    
wait = WebDriverWait(driver, 30)

Error sending stats to Plausible: error sending request for url (https://plausible.io/api/event): operation timed out


In [65]:
data = []

In [66]:
def scrape_data(cards):
    links = []
    for card in cards:
        link = card.find_element(By.CSS_SELECTOR, 'a').get_attribute('href')
        links.append(link)

    for link in links:
        attempts = 3
        while attempts >0:
            try :
                driver.get(link)
                driver.refresh()
                attempts = 0
            except TimeoutException:
                attempts = attempts - 1

        try:
            # Utilisation de BeautifulSoup pour récupérer les détails de l'annonce
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            try :
                details = soup.select_one('#wrapper-content')
            except AttributeError:
                continue

            try:
                # from details_group1
                title = details.select_one("section > div.container > div > div.page-title-main-info > h1").text.strip()
            except AttributeError:
                title = None

            try:
                # from details_group1
                price = details.select_one("section > div.container > div > div.property-info > div.property-price > span").text.strip().replace('\t', '').replace('\n', ' ')
            except AttributeError:
                price = None

            try:
                localisation = None
                if "villa" in title.lower():
                    immo_type = "villa"
                elif "appartement" in title.lower():
                    immo_type = "appartement"
                elif "studio" in title.lower():
                    immo_type = "studio"
                else:
                    immo_type = None
            except AttributeError:
                localisation = None
                immo_type = None

            # from details_group2
            try:
                superficie = details.select_one('#primary-content > div > div.single-property-element.property-info-header.property-info-action > div.property-info > div.property-area > div > p.property-info-value').text.strip().replace('\t', '').replace('\n', ' ')
            except AttributeError:
                superficie = None

            try:
                nb_pieces = details.select_one('#primary-content > div > div.single-property-element.property-info-header.property-info-action > div.property-info > div.property-bedrooms > div > p.property-info-value').text.strip()
            except AttributeError:
                nb_pieces = None
            try:
                nb_salle_de_bain = details.select_one('#primary-content > div > div.single-property-element.property-info-header.property-info-action > div.property-info > div.property-bathrooms > div > p.property-info-value').text.strip()
            except AttributeError:
                nb_salle_de_bain = None

            # from details_group3
            try:
                description1 = details.select_one('div.single-property-element.property-description').text.strip().replace('\n', ' ')
                description2 = details.select_one('#ere-overview > ul').text.strip().replace('\n', ' ')
                description = description1 + description2
            except AttributeError:
                description = None

            try:
                annonceur = soup.select_one('div.single-property-element.property-contact-agent > div.ere-property-element > div.agent-info.row > div.agent-content.col-md-6.col-sm-12.col-xs-12 > div.agent-heading > h4 > a').text.strip().replace('\n', ' ')
            except AttributeError:
                annonceur = None
            current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

            data.append({
                'title': title,
                'price': price,
                'localisation': localisation,
                'superficie': superficie,
                "type d'immobilier": immo_type,
                'nb_pieces': nb_pieces,
                "nb_salle_de_bain": nb_salle_de_bain,
                'scraping_date': current_datetime,
                "annonceur" : annonceur,
                "link" : link,
                'description': description
            })
        except:
            continue
        print(data[-1])
        print(len(data))
    return data


i = 1


In [67]:
import time

while True:

    # Attendre que les éléments soient visibles
    cards_presence = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#content > div > div > div.ere-property.clearfix.property-list.col-gap-30.list-1-column.columns-2.columns-md-2.columns-sm-2.columns-xs-1.columns-mb-1')))
    # Créer une liste de cartes
    cards = cards_presence.find_elements(By.CSS_SELECTOR, 'div > div > div.property-item-content > div.property-heading > h2')
    
    if len(data) > 3000:
        break
    
    try:
        time.sleep(10)
        next_link_presence = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#content > div > div > div.ere-property.clearfix.property-list.col-gap-30.list-1-column.columns-2.columns-md-2.columns-sm-2.columns-xs-1.columns-mb-1 > div.paging-navigation.clearfix')))
        next_link = next_link_presence.find_elements(By.CSS_SELECTOR, 'a')
        start_url = next_link[-1].get_attribute('href')
        scrape_data(cards)
        driver.get(start_url)
        print(len(data))
        i += 1
    except :

        scrape_data(cards)
        print(len(data))
        i += 1
        break

print("fin du scraping")

driver.quit()
data = pd.DataFrame(data)
data


{'title': 'Magnifique Villa en location meublée – Deux plateaux Vallons – 600 000FCFA par jour', 'price': '600.000 Fcfa / jour', 'localisation': None, 'superficie': '1.000m2', "type d'immobilier": 'villa', 'nb_pieces': '6', 'nb_salle_de_bain': '6', 'scraping_date': '2024-06-05 16:30:52', 'annonceur': None, 'link': 'https://annonces-immobilieres-cote-ivoire.com/annonces-immobilieres/magnifique-villa-en-location-meublee-deux-plateaux-vallons-600-000fcfa-par-jour/', 'description': "Description   Magnifique Villa en location meublée – Deux plateaux Vallons – 600 000 FCFA par jour. La magnifique VILLA LeMan, située dans le prestigieux quartier des Deux-Plateaux-Vallons à Cocody, vous reçoit pour vos séjours de rêve sur ABIDJAN, en Côte d’Ivoire. Dans ce cadre exceptionnel, paisible et sécurisé, la Villa LeMan vous offre toutes les commodités. La villa se trouve à 5 mn de la Rue des Jardins, à proximité de nombreux commerces, banques et restaurants. Un jardin arboré et fleuri, une piscine av

WebDriverException: Message: disconnected: not connected to DevTools
  (failed to check if window was closed: disconnected: not connected to DevTools)
  (Session info: chrome=125.0.6422.141)
Stacktrace:
	GetHandleVerifier [0x00007FF7DC991F52+60322]
	(No symbol) [0x00007FF7DC90CEC9]
	(No symbol) [0x00007FF7DC7C7EBA]
	(No symbol) [0x00007FF7DC7AF1CC]
	(No symbol) [0x00007FF7DC7AF090]
	(No symbol) [0x00007FF7DC7CA4E1]
	(No symbol) [0x00007FF7DC85B359]
	(No symbol) [0x00007FF7DC83BFC3]
	(No symbol) [0x00007FF7DC809617]
	(No symbol) [0x00007FF7DC80A211]
	GetHandleVerifier [0x00007FF7DCCA94AD+3301629]
	GetHandleVerifier [0x00007FF7DCCF36D3+3605283]
	GetHandleVerifier [0x00007FF7DCCE9450+3563680]
	GetHandleVerifier [0x00007FF7DCA44326+790390]
	(No symbol) [0x00007FF7DC91750F]
	(No symbol) [0x00007FF7DC913404]
	(No symbol) [0x00007FF7DC913592]
	(No symbol) [0x00007FF7DC902F9F]
	BaseThreadInitThunk [0x00007FF93BDB257D+29]
	RtlUserThreadStart [0x00007FF93BFEAA58+40]


In [None]:

import locale
# Définir la locale en français
locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')

# Obtenir la date et l'heure actuelles
current_datetime = datetime.now()
formatted_date = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
formatted_month = current_datetime.strftime("%B")
formatted_day = current_datetime.strftime("%d")


import platform
if platform.system() == 'Windows':
    dynamic_path = f'D:\\Bureau\\MemoiresStages\\Travaux_techniques\\Scrapping\\Datasets\\{formatted_month}\\{formatted_day}_{formatted_month}\\AnnoncesImmobilieresCI_{formatted_day}_{formatted_month}.csv'
else:
    dynamic_path = f'/mnt/d/Bureau/MemoiresStages/Travaux_techniques/Scrapping/Datasets/{formatted_month}/{formatted_day}_{formatted_month}/AnnoncesImmobilieresCI_{formatted_day}_{formatted_month}.csv'

os.makedirs(os.path.dirname(dynamic_path), exist_ok=True)

data.to_csv(dynamic_path, index=False)