In [51]:
import bs4
import pandas as pd
import csv
import re

from urllib import request

import time
import requests
from lxml import html

### Scrapping du site Century21

In [52]:
base_url = "https://www.century21.fr/annonces/location-appartement/v-paris/"
url = "https://www.century21.fr/annonces/location-appartement/v-paris/page-2/"
request_text = request.urlopen(base_url).read()
page = bs4.BeautifulSoup(request_text, "lxml")

In [53]:
# Fonction pour remplacer la première virgule par un point avant "m2"
def replace_comma_with_dot_before_m2(text):
    return re.sub(r'(\d+),(\d+\s*m2)', r'\1.\2', text, 1)

def delete_m2(text):
    return re.sub(r'(\d+\s*) m2', r'\1', text, 1)

def delete_pieces(text):
    return re.sub(r' (\d+\s*) pièce.*', r'\1', text, 1)

def delete_euro(text):
    return re.sub(r'€', '', text, 1)


def get_info(annonces):
    infos = []
    for annonce in annonces:
        info = annonce.find(class_="c-text-theme-heading-4 tw-text-c21-grey-darker tw-font-semibold")
        prix = annonce.find(class_="c-text-theme-heading-1 is-constant-size-on-mobile tw-mt-2 tablet-landscape:tw-mt-0 tw-whitespace-nowrap")
        line = info.get_text().strip().split("\n")
        res = []
        res.append(line[0].strip())
        res.append(line[2].strip())
        modified_string = line[5].strip()
        modified_string = replace_comma_with_dot_before_m2(modified_string).split(",")
        modified_string[0] = delete_m2(modified_string[0])
        modified_string[1] = delete_pieces(modified_string[1])
        res.append(modified_string[0])
        res.append(modified_string[1])
        modified_string = prix.get_text().strip().split("\n")[0]
        modified_string = modified_string.replace(" ", "")
        modified_string = delete_euro(modified_string)
        res.append(modified_string)
        infos.append(res)
    return infos


annonces = page.find_all(class_="js-the-list-of-properties-list-property")
data = get_info(annonces)
while True:
    a_tags = page.find_all(class_="c-the-pagination-bar__item tw-flex tw-items-center tw-justify-center tw-w-10 tw-h-10 tw-bg-c21-gold tw-text-white tw-transition-colors tw-duration-200")
    if len(a_tags) == 1 and a_tags[0].get('aria-label') == "prev":
        break

    for a_tag in a_tags:
        if a_tag.get('aria-label') == "next":
            url = "https://www.century21.fr" + a_tag.get('href')
            request_text = request.urlopen(url).read()
            page = bs4.BeautifulSoup(request_text, "lxml")

    annonces = page.find_all(class_="js-the-list-of-properties-list-property")
    data += get_info(annonces)

In [54]:
filename = 'century21Paris.csv'
labels = ['Ville', 'Arrondissement', 'Surface (m2)', 'Pièce(s)', 'Prix (€)']
data.insert(0, labels)
# Création et écriture dans le fichier CSV
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    
    # Écriture de chaque ligne sélectionnée dans le fichier CSV
    for line in data:
        writer.writerow([line[0], line[1], line[2], line[3], line[4]])

print(f'Fichier "{filename}" créé avec succès.')

Fichier "century21Paris.csv" créé avec succès.


### Scrapping de immo-data (statistiques de ventes)

In [67]:
def get_ventes_liste(request_text, arrondissement, verbose=False, empty_elements=False) :
    
    ventes_liste = []
    page = bs4.BeautifulSoup(request_text, "lxml")
    ventes = page.find_all(class_="border-b-gray-100")

    for v in ventes :
        print("----------------------------------------------") if verbose else None

        # Features principaux
        features_list1 = v.find(class_="gap-8")
        print(features_list1.find("p").text) if verbose else None
        print([feature.text for feature in features_list1.find_all("span")]) if verbose else None

        # Features dans les 'boîtes'
        features_list2 = v.find(class_="gap-5")
        features_list3 = []
        for child in features_list2.children :
            print("--------") if verbose else None
            for grandchild in child.children :
                print(grandchild.text) if verbose else None
                features_list3.append(grandchild.text)

        # Tout récupérer sous forme de liste
        print("--------") if verbose else None
        try :
            fullAdresse = features_list1.find("p").text.split(" - ")
            adresse = fullAdresse[0]
            paris = fullAdresse[1]
            vente_element = [
                adresse,                                                    # Adresse
                paris,                                                      # Paris
                "750" + arrondissement,                                # Arrondissement
                features_list1.find("span").text,                           # Type (Appartement, Maison)
                re.sub(r'\D', '', features_list1.find_all("span")[1].text), # Prix
                re.sub(r'\D', '', features_list1.find_all("span")[2].text), # Prix par mois
                features_list3[1],                                          # Nombre de pièces
                re.sub(r'\D', '', features_list3[3]),                       # Surface en m²
                features_list3[5],                                          # Date de vente
            ]
        
            print("--> ", vente_element) if verbose else None
            ventes_liste.append(vente_element)

        except :
            ventes_liste.append([]) if empty_elements else None # On n'a pas toutes les infos

    return ventes_liste

    # Adresse, type, prix, prix_mois, nb_pieces, surface, vendu_le

In [73]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# liste de lien pour chaque arrondissement dans l'ordre
arrondissements = [
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.334625%3B48.864198&zoom=17.499300294315052",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.342451%3B48.86857&zoom=17.119966283918643",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.360895%3B48.86467&zoom=17.84486019752125",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.356049%3B48.855975&zoom=17.386300255349592",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.345911%3B48.845679&zoom=16.887166338911186",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.333295%3B48.850064&zoom=17.062166414429374",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.319132%3B48.85654&zoom=17.203135974751085",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.31728%3B48.877229&zoom=17.007511233556734",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.339573%3B48.876424&zoom=17.661253077568162",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.359902%3B48.876323&zoom=16.57562406411128",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.379884%3B48.858611&zoom=17.50790010548914",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.397937%3B48.839398&zoom=17.795507159821344",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.356674%3B48.832128&zoom=16.744685743179307",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.326572%3B48.832711&zoom=17.203192923611237",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.299972%3B48.840821&zoom=17.776689723942038",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.27507%3B48.863143&zoom=17.1876410650208",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.322049%3B48.88398&zoom=17.80945163828482",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.346961%3B48.889737&zoom=17.59322739015418",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.384186%3B48.888629&zoom=17.14746056293807",
    "https://www.immo-data.fr/explorateur/transaction/recherche?minprice=0&maxprice=25000000&minpricesquaremeter=0&maxpricesquaremeter=40000&propertytypes=0%2C1%2C2%2C4%2C5&minmonthyear=Janvier%202014&maxmonthyear=Juin%202023&nbrooms=1%2C2%2C3%2C4%2C5&minsurface=0&maxsurface=400&minsurfaceland=0&maxsurfaceland=50000&center=2.398373%3B48.864875&zoom=17.329322339430156"
]

# Configuration des options de Chrome
chrome_options = Options()
chrome_options.add_argument("--start-fullscreen")
driver = webdriver.Chrome(options=chrome_options)
data = []
i = 0
for arrondissement in arrondissements:
    driver.get(arrondissement)

    time.sleep(10)

    page_html = driver.page_source
    arr = "0" + str(i + 1) if i < 9 else str(i + 1) # permet d'ajouter l'arrondissement dans le csv
    data += get_ventes_liste(page_html, arr, verbose=False, empty_elements=False)
    i += 1

driver.quit()

In [74]:
filename = 'immodataParis.csv'
labels = ['Adresse', 'Ville', 'Arrondissement', 'Type', 'Prix (€)', 'Prix mensuel (€)', 'Pièce(s)', 'Surface (m2)', 'Date de vente']
data.insert(0, labels)
# Création et écriture dans le fichier CSV
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    
    # Écriture de chaque ligne sélectionnée dans le fichier CSV
    for line in data:
        writer.writerow(line)

print(f'Fichier "{filename}" créé avec succès.')


Fichier "immodataParis.csv" créé avec succès.
