In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from unidecode import unidecode

In [2]:
url = 'https://www.capital.fr/auto/les-20-voitures-electriques-les-plus-vendues-en-france-en-2021-1424558' 
response = requests.get(url)

data = []

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    h2_element = soup.find('h2', string='Les voitures électriques les plus vendues en 2021')
    ol_element = None
    if h2_element:
        ol_element = h2_element.find_next_sibling('ol')

    if ol_element:
        for li in ol_element.find_all('li'):
            car_data = li.text.split(':')
            car_name = car_data[0].strip()
            units_sold = car_data[1].replace('.', '').replace('unités', '').strip()
            data.append([car_name, units_sold])
    else:
        print("No ordered list found on the webpage.")
else:
    print(f"Failed to retrieve the web page, status code: {response.status_code}")

carsales_2021 = pd.DataFrame(data, columns=['modele', 'ventes'])
carsales_2021['annee'] = 2021
carsales_2021_filtered = carsales_2021.head(10)
carsales_2021_filtered

Unnamed: 0,modele,ventes,annee
0,Tesla Model 3,24911,2021
1,Renault Zoé,23573,2021
2,Peugeot e-208,17858,2021
3,Dacia Spring,11386,2021
4,Fiat 500,9556,2021
5,Renault Twingo ZE,8837,2021
6,Kia e-Niro,6304,2021
7,Volkswagen ID3,5860,2021
8,Peugeot e-2008,5461,2021
9,Mini Cooper SE,5141,2021


In [3]:
url = 'https://photo.capital.fr/les-10-voitures-electriques-les-plus-vendues-en-france-en-2019-39395#-60j7f' 
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

card_contents = soup.find_all('div', class_='card-content')
card_contents = card_contents[:-2]

titles = [
    "Renault Zoe (18.817 VENTES)",
    "Tesla Model 3 (6.455 VENTES)",
    "Nissan Leaf (3.738 VENTES)",
    "BMW i3 (2.793 VENTES)",
    "Kia e-Niro (1.850 VENTES)",
    "Smart EQ Fortwo (1.790 VENTES)",
    "Hyundai Kona Electric	 (1.513 VENTES)",
    "Volkswagen eGolf (661 VENTES)",
    "Hyundai Ioniq Electric (553 VENTES)",
    "Citroen Czero (543 VENTES)"
]

car_models = []
units_sold = []

for title in titles:
    if '(' in title:
        model, sold = title.split('(')
        car_models.append(model.strip())
        units_sold.append(sold.replace(' VENTES)', '').replace('.', '').strip())  
    else:
        car_models.append(title.strip())
        units_sold.append("N/A")

carsales_2019 = pd.DataFrame({
    'modele': car_models,
    'ventes': units_sold,
    'annee':2019
})

carsales_2019

Unnamed: 0,modele,ventes,annee
0,Renault Zoe,18817,2019
1,Tesla Model 3,6455,2019
2,Nissan Leaf,3738,2019
3,BMW i3,2793,2019
4,Kia e-Niro,1850,2019
5,Smart EQ Fortwo,1790,2019
6,Hyundai Kona Electric,1513,2019
7,Volkswagen eGolf,661,2019
8,Hyundai Ioniq Electric,553,2019
9,Citroen Czero,543,2019


In [4]:
url = 'https://www.automobile-propre.com/voiture-electrique-le-top-10-des-ventes-en-france-en-2020/'
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

li_elements = soup.find_all("ul")[13].find_all("li")

titles = [
    "Renault Zoe : 37 409",
    "Peugeot e208 : 16 557",
    "Tesla Model 3 : 6 477",
    "Hyundai Kona : 5 156",
    "Kia e-Niro : 5 089",
    "Volkswagen ID3 : 4 187",
    "Nissan Leaf : 3 395",
    "Peugeot e-2008 : 2 933",
    "DS DS3 Crossback : 2 710",
    "Mini Electric : 2 481"
]
car_models = []
units_sold = []

for title in titles:
    if ':' in title:
        model, units = title.split(':')
        car_models.append(model.strip())
        units_sold.append(int(units.replace(' ', '')))

carsales_2020 = pd.DataFrame({'modele': car_models,'ventes': units_sold,'annee':2020})
carsales_2020

Unnamed: 0,modele,ventes,annee
0,Renault Zoe,37409,2020
1,Peugeot e208,16557,2020
2,Tesla Model 3,6477,2020
3,Hyundai Kona,5156,2020
4,Kia e-Niro,5089,2020
5,Volkswagen ID3,4187,2020
6,Nissan Leaf,3395,2020
7,Peugeot e-2008,2933,2020
8,DS DS3 Crossback,2710,2020
9,Mini Electric,2481,2020


In [6]:
url = 'https://www.numerama.com/vroom/1224116-top-2022-des-voitures-electriques-la-plus-vendue-en-france-cette-annee-est-francaise.html'
response = requests.get(url)

if response.ok:
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'class': 'has-fixed-layout'})
    headers = [header.text.strip() for header in table.find_all('th')]
    
    data = []
    for row in table.find_all('tr'):
        columns = row.find_all('td')
        if columns:
            data_row = {}
            for header, column in zip(headers, columns):
                column_text = column.text.strip().replace(' ', '')  # Removing spaces from the 'ventes' column values
                data_row[header] = column_text
            data.append(data_row)
            
    carsales2022 = pd.DataFrame(data)
    carsales2022.columns = ['modele', 'ventes']
    carsales2022['annee'] = 2022
    top_ten_carsales2022 = carsales2022.head(10)
    print(top_ten_carsales2022)

                modele ventes  annee
0         Peugeote-208  19219   2022
1          DaciaSpring  18326   2022
2          TeslaModel3  17005   2022
3  RenaultMeganeE-Tech  15580   2022
4             Fiat500e  15163   2022
5        RenaultTwingo  12655   2022
6           RenaultZoe  12180   2022
7          TeslaModelY  11892   2022
8        Peugeote-2008   7174   2022
9          HyundaiKona   6522   2022


In [7]:
#Concatenating
dframes = [carsales_2019,carsales_2020,carsales_2021_filtered,top_ten_carsales2022]
ev_constructors = pd.concat(dframes)
ev_constructors = ev_constructors.reset_index(drop=True)
ev_constructors.insert(0, 'model_id', range(1, 41))
ev_constructors


Unnamed: 0,model_id,modele,ventes,annee
0,1,Renault Zoe,18817,2019
1,2,Tesla Model 3,6455,2019
2,3,Nissan Leaf,3738,2019
3,4,BMW i3,2793,2019
4,5,Kia e-Niro,1850,2019
5,6,Smart EQ Fortwo,1790,2019
6,7,Hyundai Kona Electric,1513,2019
7,8,Volkswagen eGolf,661,2019
8,9,Hyundai Ioniq Electric,553,2019
9,10,Citroen Czero,543,2019


In [8]:
ev_constructors['modele'] = ev_constructors['modele'].apply(unidecode)

In [9]:
ev_constructors

Unnamed: 0,model_id,modele,ventes,annee
0,1,Renault Zoe,18817,2019
1,2,Tesla Model 3,6455,2019
2,3,Nissan Leaf,3738,2019
3,4,BMW i3,2793,2019
4,5,Kia e-Niro,1850,2019
5,6,Smart EQ Fortwo,1790,2019
6,7,Hyundai Kona Electric,1513,2019
7,8,Volkswagen eGolf,661,2019
8,9,Hyundai Ioniq Electric,553,2019
9,10,Citroen Czero,543,2019


In [10]:
ev_constructors.dtypes

model_id     int64
modele      object
ventes      object
annee        int64
dtype: object

In [11]:
ev_constructors['ventes'] = pd.to_numeric(ev_constructors['ventes'], errors='coerce')
ev_constructors['ventes'] = ev_constructors['ventes'].fillna(0)
ev_constructors['ventes'] = ev_constructors['ventes'].astype(int)

In [12]:
ev_constructors

Unnamed: 0,model_id,modele,ventes,annee
0,1,Renault Zoe,18817,2019
1,2,Tesla Model 3,6455,2019
2,3,Nissan Leaf,3738,2019
3,4,BMW i3,2793,2019
4,5,Kia e-Niro,1850,2019
5,6,Smart EQ Fortwo,1790,2019
6,7,Hyundai Kona Electric,1513,2019
7,8,Volkswagen eGolf,661,2019
8,9,Hyundai Ioniq Electric,553,2019
9,10,Citroen Czero,543,2019


In [13]:
ev_constructors.to_csv('top_selling_models.csv', index=False)