## Packages and Bibliothèque

In [15]:
import os
import ast
import json
import re
from typing import List

import pandas as pd
import numpy as np
from src.module.processing import preprocessing as process

## Data Import

In [9]:
path_data_raw = "../data/raw/liste_fiches_technical_details.csv"
path_data_cleaning = "../data/cleaning/liste_fiches_technical_details_cleaning.csv"

In [None]:
df_raw = pd.read_csv(path_data_raw)

In [None]:
df = df_raw.copy()

In [None]:
df.head(10)

In [None]:
df = df[['Marque', 'Modele', 'Annee', 'Vehicule', 'Prix', 'Date Publication',
         'Resumer', 'Dimensions', 'Weight', 'Habitability', 'Tires', 'Engine',
         'Transmission', 'Performance', 'Consumption', 'Immatriculation']]

In [None]:
cols_to_explode = ['Resumer', 'Dimensions', 'Weight', 'Habitability', 'Tires', 'Engine', 'Transmission', 'Performance', 'Consumption']

In [None]:
df.columns

## Data Preprocessing

In [None]:
# Application des transformations avec pipe tout en gardant les mêmes colonnes
df = (df
            .pipe(process.clean_prix_column)
            .pipe(process.convert_to_date)
            .pipe(process.convert_column_to_int, 'Annee')
            .pipe(lambda x: x.assign(Consumption=x['Consumption'].apply(process.clean_consumption)))
            .pipe(lambda x: x.assign(Performance=x['Performance'].apply(process.clean_performance)))
            .pipe(lambda x: x.assign(Habitability=x['Habitability'].apply(process.clean_habitability)))
            .pipe(lambda x: x.assign(Dimensions=x['Dimensions'].apply(process.clean_dimensions)))
            .pipe(lambda x: x.assign(Weight=x['Weight'].apply(process.clean_weight)))
            .pipe(lambda x: x.assign(Engine=x['Engine'].apply(process.clean_engine)))
            .pipe(lambda x: x.assign(Resumer=x['Resumer'].apply(process.clean_resumer)))
            .pipe(lambda x: x.assign(Tires=x['Tires'].apply(process.clean_tires)))
            )

## Data Cleaning Combine

In [12]:
df_clean = pd.read_csv(path_data_cleaning, sep=";")
df_clean.head(10)

Unnamed: 0,Marque,Modele,Annee,Vehicule,Prix,Date Publication,Resumer,Dimensions,Weight,Habitability,Tires,Engine,Transmission,Performance,Consumption,Immatriculation
0,Ineos,Grenadier,2024,Ineos Grenadier 3.0 T 286ch Fieldmaster Edition,82490.0,2022-04-01,"{'energie': 'Essence', 'puissance_commerciale'...","{'longueur': 4.9, 'largeur': 1.93, 'hauteur': ...","{'poids_a_vide': 2669, 'ptac': 3500, 'ptra': 7...","{'nombre_de_places': 5, 'volume_de_coffre': 11...","{'types_de_pneumatiques': '4x4', 'materiau_des...","{'Nom_du_moteur': 3.0, 'Energie': 'Essence', '...",{'Boite_de_vitesses': 'Automatique 8 rapports'...,"{'Vitesse_maximale': 160, '0_a_100_km/h': 8.6,...","{'Mixte': 14.4, 'Emission_de_CO2': 325, 'Immat...",7e546927-d3e3-477f-8971-b0cd70187264
1,Mercedes-Benz,Classe C,2004,Mercedes-Benz Classe C II (W203) 240 V6 Elegance,36865.0,2004-04-01,"{'energie': 'Essence', 'puissance_commerciale'...","{'longueur': 4.52, 'largeur': 1.72, 'hauteur':...","{'poids_a_vide': 1535, 'ptac': 2015, 'ptra': 3...","{'nombre_de_places': 5, 'volume_de_coffre': 46...","{'types_de_pneumatiques': 'Classique', 'taille...","{'Nom_du_moteur': '240V6', 'Energie': 'Essence...","{'Boite_de_vitesses': 'Mécanique 6 rapports', ...","{'Vitesse_maximale': 235, '0_a_100_km/h': 9.2,...","{'Cycle_urbain': 16.0, 'Extra_urbain': 7.8, 'M...",fb2f74bc-f6e0-4624-967c-78d9466accfa
2,Jaguar,S-Type,2005,Jaguar S-Type 2.7D Bi-turbo,41700.0,2004-06-01,"{'energie': 'Diesel', 'puissance_commerciale':...","{'longueur': 4.91, 'largeur': 1.82, 'hauteur':...","{'poids_a_vide': 1722, 'ptac': 2255, 'ptra': 4...","{'nombre_de_places': 5, 'volume_de_coffre': 40...","{'types_de_pneumatiques': 'Classique', 'materi...","{'Nom_du_moteur': '2.7DV6207', 'Energie': 'Die...","{'Boite_de_vitesses': 'Mécanique 6 rapports', ...","{'Vitesse_maximale': 230, '0_a_100_km/h': 8.5,...","{'Cycle_urbain': 9.7, 'Extra_urbain': 5.7, 'Mi...",cab0aab1-ba72-4778-b7f4-d1efaa37d5a9
3,Bmw,Série 5,2003,BMW Série 5 IV (E60) 530iA 231ch Premiere,44000.0,2003-07-01,"{'energie': 'Essence', 'puissance_commerciale'...","{'longueur': 4.84, 'largeur': 1.85, 'hauteur':...","{'poids_a_vide': 1580, 'ptac': 2065, 'ptra': 4...","{'nombre_de_places': 5, 'volume_de_coffre': 52...","{'types_de_pneumatiques': 'Classique', 'taille...","{'Nom_du_moteur': '3.0i', 'Energie': 'Essence'...",{'Boite_de_vitesses': 'Automatique 6 rapports'...,"{'Vitesse_maximale': 245, '0_a_100_km/h': 7.1,...","{'Cycle_urbain': 14.2, 'Extra_urbain': 7.5, 'M...",0285f39e-16ab-410d-939c-37de56206408
4,Opel,Combo,2006,Opel Combo Tour 1.7 CDTI100 Arizona,18600.0,2005-07-01,"{'energie': 'Diesel', 'puissance_commerciale':...","{'longueur': 4.32, 'largeur': 1.68, 'hauteur':...","{'poids_a_vide': 1290, 'ptac': 1855, 'ptra': 2...","{'nombre_de_places': 5, 'volume_de_coffre': 45...","{'types_de_pneumatiques': 'Classique', 'materi...","{'Nom_du_moteur': '1.7CDI100', 'Energie': 'Die...","{'Boite_de_vitesses': 'Mécanique 5 rapports', ...","{'Vitesse_maximale': 170, '0_a_100_km/h': 12.5...","{'Cycle_urbain': 6.6, 'Extra_urbain': 4.4, 'Mi...",3ea13155-3031-4ade-836f-c72c593b671c
5,Lexus,IS,2010,Lexus IS II 200d F-Sport,36600.0,2010-08-01,"{'energie': 'Diesel', 'puissance_commerciale':...","{'longueur': 4.59, 'largeur': 1.8, 'hauteur': ...","{'poids_a_vide': 1540, 'ptac': 2075, 'ptra': 3...","{'nombre_de_places': 5, 'volume_de_coffre': 39...","{'types_de_pneumatiques': 'Classique', 'materi...","{'Nom_du_moteur': '2.2D', 'Energie': 'Diesel',...","{'Boite_de_vitesses': 'Mécanique 6 rapports', ...","{'Vitesse_maximale': 205, '0_a_100_km/h': 10.2...","{'Cycle_urbain': 6.3, 'Extra_urbain': 4.4, 'Mi...",0937655d-3cb1-4311-a657-c8fd974daa63
6,Renault,Scenic,2012,Renault Scenic III (J95) 1.5 dCi 110ch Energy ...,28300.0,2012-09-01,"{'energie': 'Diesel', 'puissance_commerciale':...","{'longueur': 4.37, 'largeur': 1.85, 'hauteur':...","{'poids_a_vide': 1385, 'ptac': 1944, 'ptra': 3...","{'nombre_de_places': 5, 'volume_de_coffre': 47...","{'types_de_pneumatiques': 'Classique', 'materi...","{'Nom_du_moteur': '1.5dCi110', 'Energie': 'Die...","{'Boite_de_vitesses': 'Mécanique 6 rapports', ...","{'Vitesse_maximale': 180, '0_a_100_km/h': 12.5...","{'Cycle_urbain': 4.5, 'Extra_urbain': 3.9, 'Mi...",31e0569c-600e-46cb-8f91-45c525e89821
7,Peugeot,307,2003,Peugeot 307 2.0 HDi110 XS Pack 3p,23200.0,2001-04-01,"{'energie': 'Diesel', 'puissance_commerciale':...","{'longueur': 4.2, 'largeur': 1.75, 'hauteur': ...","{'poids_a_vide': 1279, 'ptac': 1779, 'ptra': 3...","{'nombre_de_places': 5, 'volume_de_coffre': 34...","{'types_de_pneumatiques': 'Classique', 'taille...","{'Nom_du_moteur': '2.0HDi110', 'Energie': 'Die...","{'Boite_de_vitesses': 'Mécanique 5 rapports', ...","{'Vitesse_maximale': 188, '0_a_100_km/h': 11.8...","{'Cycle_urbain': 7.0, 'Extra_urbain': 4.2, 'Mi...",0e6a4256-8917-42b3-bfdf-84c419f3f429
8,motos-cyclos,Supermoto,2007,KTM Supermoto II Supermoto 690 Prestige,8808.0,2006-10-01,"{'energie': 'Essence', 'puissance_commerciale'...",{'Immatriculation': '105a5377-e006-4fc2-85f3-2...,{'Immatriculation': '105a5377-e006-4fc2-85f3-2...,{'Immatriculation': '105a5377-e006-4fc2-85f3-2...,{'Immatriculation': '105a5377-e006-4fc2-85f3-2...,"{'Nom_du_moteur': 690.0, 'Energie': 'Essence',...",{'Boite_de_vitesses': 'Manuelle'},{'Immatriculation': '105a5377-e006-4fc2-85f3-2...,{'Immatriculation': '105a5377-e006-4fc2-85f3-2...,105a5377-e006-4fc2-85f3-21b4c566ca78
9,Mini,Clubman,2022,Mini Clubman II (F54) One D 116ch Knightsbridg...,38800.0,2021-09-01,"{'energie': 'Diesel', 'puissance_commerciale':...","{'longueur': 4.27, 'largeur': 1.8, 'hauteur': ...","{'poids_a_vide': 1405, 'ptac': 1930, 'charge_u...","{'nombre_de_places': 5, 'volume_de_coffre': 36...","{'types_de_pneumatiques': 'Eté', 'materiau_des...","{'Nom_du_moteur': '1.5D', 'Energie': 'Diesel',...",{'Boite_de_vitesses': 'Mécanique robotisée 7 r...,"{'Vitesse_maximale': 192, '0_a_100_km/h': 10.8...","{'Mixte': 4.8, 'Emission_de_CO2': 125, 'Immatr...",cc2a91d6-c28b-4997-b885-a71f2c6c019c


In [14]:
cols = ['Marque', 'Modele', 'Annee', 'Vehicule', 'Prix', 'Date Publication', 'Immatriculation']
df_clean = df_clean[cols]
df_clean.head()

Unnamed: 0,Marque,Modele,Annee,Vehicule,Prix,Date Publication,Immatriculation
0,Ineos,Grenadier,2024,Ineos Grenadier 3.0 T 286ch Fieldmaster Edition,82490.0,2022-04-01,7e546927-d3e3-477f-8971-b0cd70187264
1,Mercedes-Benz,Classe C,2004,Mercedes-Benz Classe C II (W203) 240 V6 Elegance,36865.0,2004-04-01,fb2f74bc-f6e0-4624-967c-78d9466accfa
2,Jaguar,S-Type,2005,Jaguar S-Type 2.7D Bi-turbo,41700.0,2004-06-01,cab0aab1-ba72-4778-b7f4-d1efaa37d5a9
3,Bmw,Série 5,2003,BMW Série 5 IV (E60) 530iA 231ch Premiere,44000.0,2003-07-01,0285f39e-16ab-410d-939c-37de56206408
4,Opel,Combo,2006,Opel Combo Tour 1.7 CDTI100 Arizona,18600.0,2005-07-01,3ea13155-3031-4ade-836f-c72c593b671c


In [13]:
df_consumption = pd.read_csv("../data/cleaning/relation/consumption_data.csv", sep=";")

Index(['Marque', 'Modele', 'Annee', 'Vehicule', 'Prix', 'Date Publication',
       'Resumer', 'Dimensions', 'Weight', 'Habitability', 'Tires', 'Engine',
       'Transmission', 'Performance', 'Consumption', 'Immatriculation'],
      dtype='object')

In [16]:
def create_dfs_from_csvs(directory_path):
    dfs = {}

    for filename in os.listdir(directory_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(directory_path, filename)
            dataframe = pd.read_csv(file_path)
            # Ajouter le DataFrame au dictionnaire avec le nom du fichier (sans extension) comme clé
            dfs[os.path.splitext(filename)[0]] = dataframe

    return dfs

In [17]:
dfs_dict = create_dfs_from_csvs('../data/cleaning/relation')

In [19]:
dfs_dict.keys()

dict_keys(['dimension_data', 'consumption_data', 'engine_data', 'tires_data', 'resumer_data', 'performance_data', 'transmission_data', 'habitability_data', 'weigth_data'])

In [22]:
dimension_data = dfs_dict['dimension_data']
engine_data = dfs_dict['engine_data']
consumption_data = dfs_dict['consumption_data']
tires_data = dfs_dict['tires_data']
resumer_data = dfs_dict['resumer_data']
performance_data = dfs_dict['performance_data']
transmission_data = dfs_dict['transmission_data']
habitability_data = dfs_dict['habitability_data']
weigth_data = dfs_dict['weigth_data']

In [42]:
df_clean = df_clean.merge(dfs_dict['dimension_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['engine_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['consumption_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['tires_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['resumer_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['performance_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['transmission_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['habitability_data'], on='Immatriculation', how='left')
df_clean = df_clean.merge(dfs_dict['weigth_data'], on='Immatriculation', how='left')

In [43]:
df_clean.head()

Unnamed: 0,Marque,Modele,Annee,Vehicule,Prix,Date Publication,Immatriculation,longueur,largeur,hauteur,...,Mode_de_transmission,nombre_de_places,volume_de_coffre,volume_de_coffre_utile,poids_a_vide,ptac,ptra,charge_utile,poids_tracte_freine,poids_tracte_non_freine
0,Ineos,Grenadier,2024,Ineos Grenadier 3.0 T 286ch Fieldmaster Edition,82490.0,2022-04-01,7e546927-d3e3-477f-8971-b0cd70187264,4.9,1.93,2.05,...,Transmission intégrale permanente,5.0,1152.0,2035.0,2669.0,3500.0,7000.0,831.0,3500.0,750.0
1,Mercedes-Benz,Classe C,2004,Mercedes-Benz Classe C II (W203) 240 V6 Elegance,36865.0,2004-04-01,fb2f74bc-f6e0-4624-967c-78d9466accfa,4.52,1.72,1.43,...,Propulsion,5.0,465.0,1510.0,1535.0,2015.0,3515.0,480.0,1500.0,750.0
2,Jaguar,S-Type,2005,Jaguar S-Type 2.7D Bi-turbo,41700.0,2004-06-01,cab0aab1-ba72-4778-b7f4-d1efaa37d5a9,4.91,1.82,1.45,...,Propulsion,5.0,400.0,810.0,1722.0,2255.0,4105.0,533.0,1850.0,750.0
3,Bmw,Série 5,2003,BMW Série 5 IV (E60) 530iA 231ch Premiere,44000.0,2003-07-01,0285f39e-16ab-410d-939c-37de56206408,4.84,1.85,1.47,...,Propulsion,5.0,520.0,1559.08,1580.0,2065.0,4065.0,485.0,2000.0,750.0
4,Opel,Combo,2006,Opel Combo Tour 1.7 CDTI100 Arizona,18600.0,2005-07-01,3ea13155-3031-4ade-836f-c72c593b671c,4.32,1.68,1.8,...,Traction,5.0,455.0,2700.0,1290.0,1855.0,2855.0,565.0,1000.0,702.73


In [46]:
df_clean.to_csv("../data/cleaning/models/vehicules_raws.csv", index=False)