# Prévision des prix des voitures

### 1.4 Importation de bibliothèques

In [1]:
import re
import pandas as pd
import seaborn as sns

import mlflow
import mlflow.tensorflow
from mlflow.models import infer_signature

from sklearn.preprocessing import LabelEncoder
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.layers import Normalization, Dense, Input, ReLU, Dropout, InputLayer
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError

import keras_tuner as kt

import matplotlib.pyplot as plt

2024-08-23 01:49:54.219005: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### MlFlow Tracking

In [2]:
POSTGRES_HOST = "modele-pfe"
POSTGRES_DB = "pfe-database-monitoring"
POSTGRES_PASSWORD = "admin"
POSTGRES_USER = "Admin123"
POSTGRES_PORT = 5432

uri = 'postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}'

####  Configure MLflow

In [3]:
# Configure MLflow
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("vehicle_prediction")

<Experiment: artifact_location='/mlflow/mlruns/2', creation_time=1724345172352, experiment_id='2', last_update_time=1724345172352, lifecycle_stage='active', name='vehicle_prediction', tags={}>

In [4]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'http://localhost:5000'


### Parametres

In [5]:
TRAIN_RATIO = 0.7
VAL_RATIO = 0.1
TEST_RATIO =0.2
EURO_PARITY = 667,96
learning_rate = 0.1
epochs = 100
batch_size = 52
factor = 3

### Definition fonction

In [6]:
def clean_numeric_string(value):
    """
    Supprime tous les caractères non numériques d'une chaîne de caractères.
    Si la chaîne devient vide après le nettoyage, retourne NaN.
    """
    # Supprimer tous les caractères non numériques, sauf les points (pour les flottants)
    cleaned_value = re.sub(r'[^\d.]+', '', str(value))

    # Si après le nettoyage la chaîne est vide, retourner NaN
    return float(cleaned_value) if cleaned_value else float('nan')

def convert_columns_to_int(df, cols):
    for col in cols:
        if col in df.columns:
            df[col] = df[col].apply(clean_numeric_string)
            df[col] = df[col].fillna(0).astype(float).astype(int)
    return df

def convert_prices_to_cfa(dataframe: pd.DataFrame, col_name: str = 'Prix', parity: float = 667.66):
    """
    Convertit les prix d'une colonne donnée en CFA en utilisant un taux de conversion spécifique.
    
    :param dataframe: Le DataFrame à modifier.
    :param col_name: Le nom de la colonne à convertir. Par défaut, 'Prix'.
    :param parity: Le taux de conversion Euro vers CFA. Par défaut, 655.957.
    :return: Le DataFrame modifié avec les prix convertis.
    """
    if col_name in dataframe.columns:
        # Convertir les prix en CFA et arrondir à l'entier le plus proche
        dataframe[col_name] = (dataframe[col_name] * parity).round().astype(int)
    return dataframe


def one_hot_encode(dataframe: pd.DataFrame, cols: list):
    """
    Encodes specified categorical columns in the DataFrame using one-hot encoding.

    Args:
        dataframe (pd.DataFrame): The DataFrame to encode.
        cols (list): List of column names to encode.

    Returns:
        pd.DataFrame: The DataFrame with one-hot encoded columns.
    """
    df_encoded = pd.get_dummies(dataframe, columns=cols, drop_first=True)
    return df_encoded

def label_encode(dataframe: pd.DataFrame, cols: list):
    """
    Encodes specified categorical columns in the DataFrame using label encoding.

    Args:
        dataframe (pd.DataFrame): The DataFrame to encode.
        cols (list): List of column names to encode.

    Returns:
        pd.DataFrame: The DataFrame with label encoded columns.
    """
    df_encoded = dataframe.copy()
    for col in cols:
        if col in df_encoded.columns:
            le = LabelEncoder()
            df_encoded[col] = le.fit_transform(df_encoded[col].astype(str))
    return df_encoded

### 1.5 Chargement des données

In [7]:
df_raw = pd.read_csv('../data/cleaning/models/vehicules_raws.csv')

In [8]:
df = df_raw.copy()
df.head(10)

Unnamed: 0,Marque,Modele,Annee,Vehicule,Prix,Date Publication,Immatriculation,longueur,largeur,hauteur,...,Mode_de_transmission,nombre_de_places,volume_de_coffre,volume_de_coffre_utile,poids_a_vide,ptac,ptra,charge_utile,poids_tracte_freine,poids_tracte_non_freine
0,Ineos,Grenadier,2024,Ineos Grenadier 3.0 T 286ch Fieldmaster Edition,82490.0,2022-04-01,7e546927-d3e3-477f-8971-b0cd70187264,4.9,1.93,2.05,...,Transmission intégrale permanente,5.0,1152.0,2035.0,2669.0,3500.0,7000.0,831.0,3500.0,750.0
1,Mercedes-Benz,Classe C,2004,Mercedes-Benz Classe C II (W203) 240 V6 Elegance,36865.0,2004-04-01,fb2f74bc-f6e0-4624-967c-78d9466accfa,4.52,1.72,1.43,...,Propulsion,5.0,465.0,1510.0,1535.0,2015.0,3515.0,480.0,1500.0,750.0
2,Jaguar,S-Type,2005,Jaguar S-Type 2.7D Bi-turbo,41700.0,2004-06-01,cab0aab1-ba72-4778-b7f4-d1efaa37d5a9,4.91,1.82,1.45,...,Propulsion,5.0,400.0,810.0,1722.0,2255.0,4105.0,533.0,1850.0,750.0
3,Bmw,Série 5,2003,BMW Série 5 IV (E60) 530iA 231ch Premiere,44000.0,2003-07-01,0285f39e-16ab-410d-939c-37de56206408,4.84,1.85,1.47,...,Propulsion,5.0,520.0,1559.08,1580.0,2065.0,4065.0,485.0,2000.0,750.0
4,Opel,Combo,2006,Opel Combo Tour 1.7 CDTI100 Arizona,18600.0,2005-07-01,3ea13155-3031-4ade-836f-c72c593b671c,4.32,1.68,1.8,...,Traction,5.0,455.0,2700.0,1290.0,1855.0,2855.0,565.0,1000.0,702.73
5,Lexus,IS,2010,Lexus IS II 200d F-Sport,36600.0,2010-08-01,0937655d-3cb1-4311-a657-c8fd974daa63,4.59,1.8,1.44,...,Propulsion,5.0,398.0,1559.08,1540.0,2075.0,3575.0,490.0,1500.0,560.0
6,Renault,Scenic,2012,Renault Scenic III (J95) 1.5 dCi 110ch Energy ...,28300.0,2012-09-01,31e0569c-600e-46cb-8f91-45c525e89821,4.37,1.85,1.64,...,Traction,5.0,470.0,1870.0,1385.0,1944.0,3264.0,404.0,1300.0,730.0
7,Peugeot,307,2003,Peugeot 307 2.0 HDi110 XS Pack 3p,23200.0,2001-04-01,0e6a4256-8917-42b3-bfdf-84c419f3f429,4.2,1.75,1.51,...,Traction,5.0,341.0,1328.0,1279.0,1779.0,3199.0,500.0,1340.0,702.73
8,motos-cyclos,Supermoto,2007,KTM Supermoto II Supermoto 690 Prestige,8808.0,2006-10-01,105a5377-e006-4fc2-85f3-21b4c566ca78,4.54,1.81,1.62,...,Traction,4.83,492.81,1559.08,1485.33,2222.7,3906.19,618.57,1701.09,702.73
9,Mini,Clubman,2022,Mini Clubman II (F54) One D 116ch Knightsbridg...,38800.0,2021-09-01,cc2a91d6-c28b-4997-b885-a71f2c6c019c,4.27,1.8,1.44,...,Traction,5.0,360.0,1250.0,1405.0,1930.0,3906.19,525.0,1200.0,740.0


In [9]:
df.shape

(6310, 66)

### 1.6 Data Preprocessing

In [10]:
df.isnull().sum()

Marque                     0
Modele                     0
Annee                      0
Vehicule                   0
Prix                       0
                          ..
ptac                       0
ptra                       0
charge_utile               0
poids_tracte_freine        0
poids_tracte_non_freine    0
Length: 66, dtype: int64

In [11]:
cols_convert_int = [
    'Prix',
    'volume_de_coffre_utile',
    'poids_a_vide',
    'volume_de_coffre',
    'poids_a_vide',
    'ptac',
    'ptra',
    'charge_utile',
    'poids_tracte_freine',
    'poids_tracte_non_freine',
    'nombre_de_places',
    'Vitesse_maximale',
    'Emission_de_CO2',
    'Course',
    'Alesage',
    'Puissance_reelle_maxi_kW',
    'Puissance_reelle_maxi_ch',
    'Nombre_de_soupapes',
    'Couple_maxi',
    'Au_regime_de',
    'Cylindree',
    'angle_ventral',
    'angle_dattaque',
    'angle_de_fuite',
    'garde_au_sol',
    'reservoir',
]

df = convert_columns_to_int(df, cols_convert_int)
df.head(10)

Unnamed: 0,Marque,Modele,Annee,Vehicule,Prix,Date Publication,Immatriculation,longueur,largeur,hauteur,...,Mode_de_transmission,nombre_de_places,volume_de_coffre,volume_de_coffre_utile,poids_a_vide,ptac,ptra,charge_utile,poids_tracte_freine,poids_tracte_non_freine
0,Ineos,Grenadier,2024,Ineos Grenadier 3.0 T 286ch Fieldmaster Edition,82490,2022-04-01,7e546927-d3e3-477f-8971-b0cd70187264,4.9,1.93,2.05,...,Transmission intégrale permanente,5,1152,2035,2669,3500,7000,831,3500,750
1,Mercedes-Benz,Classe C,2004,Mercedes-Benz Classe C II (W203) 240 V6 Elegance,36865,2004-04-01,fb2f74bc-f6e0-4624-967c-78d9466accfa,4.52,1.72,1.43,...,Propulsion,5,465,1510,1535,2015,3515,480,1500,750
2,Jaguar,S-Type,2005,Jaguar S-Type 2.7D Bi-turbo,41700,2004-06-01,cab0aab1-ba72-4778-b7f4-d1efaa37d5a9,4.91,1.82,1.45,...,Propulsion,5,400,810,1722,2255,4105,533,1850,750
3,Bmw,Série 5,2003,BMW Série 5 IV (E60) 530iA 231ch Premiere,44000,2003-07-01,0285f39e-16ab-410d-939c-37de56206408,4.84,1.85,1.47,...,Propulsion,5,520,1559,1580,2065,4065,485,2000,750
4,Opel,Combo,2006,Opel Combo Tour 1.7 CDTI100 Arizona,18600,2005-07-01,3ea13155-3031-4ade-836f-c72c593b671c,4.32,1.68,1.8,...,Traction,5,455,2700,1290,1855,2855,565,1000,702
5,Lexus,IS,2010,Lexus IS II 200d F-Sport,36600,2010-08-01,0937655d-3cb1-4311-a657-c8fd974daa63,4.59,1.8,1.44,...,Propulsion,5,398,1559,1540,2075,3575,490,1500,560
6,Renault,Scenic,2012,Renault Scenic III (J95) 1.5 dCi 110ch Energy ...,28300,2012-09-01,31e0569c-600e-46cb-8f91-45c525e89821,4.37,1.85,1.64,...,Traction,5,470,1870,1385,1944,3264,404,1300,730
7,Peugeot,307,2003,Peugeot 307 2.0 HDi110 XS Pack 3p,23200,2001-04-01,0e6a4256-8917-42b3-bfdf-84c419f3f429,4.2,1.75,1.51,...,Traction,5,341,1328,1279,1779,3199,500,1340,702
8,motos-cyclos,Supermoto,2007,KTM Supermoto II Supermoto 690 Prestige,8808,2006-10-01,105a5377-e006-4fc2-85f3-21b4c566ca78,4.54,1.81,1.62,...,Traction,4,492,1559,1485,2222,3906,618,1701,702
9,Mini,Clubman,2022,Mini Clubman II (F54) One D 116ch Knightsbridg...,38800,2021-09-01,cc2a91d6-c28b-4997-b885-a71f2c6c019c,4.27,1.8,1.44,...,Traction,5,360,1250,1405,1930,3906,525,1200,740


In [12]:
cols_drop = [
    'date_de_fin_de_commercialisation', 
    'Date Publication', 
    'Immatriculation', 
    'Nom_du_moteur',
    'emission_de_co2',
    'puissance_commerciale',
    'angle_de_fuite',
    'angle_ventral',
    'angle_dattaque',
    'garde_au_sol',
    'Vehicule',
    'energie',
    'boite_de_vitesses'
]
df.drop(cols_drop, inplace=True, axis=1)

#### Convertir le prix véhicule en CFA

In [13]:
df = convert_prices_to_cfa(df)

#### Validate et determination anciente vehicule

In [14]:
df['Annee'] = pd.to_numeric(df['Annee'], errors='coerce')
df['Age_Vehicule'] = 2024 - df['Annee']
df.drop(['Annee'], inplace=True, axis=1)

In [15]:
df.head(25)

Unnamed: 0,Marque,Modele,Prix,longueur,largeur,hauteur,empattement,reservoir,porte_a_faux_avant,porte_a_faux_arriere,...,nombre_de_places,volume_de_coffre,volume_de_coffre_utile,poids_a_vide,ptac,ptra,charge_utile,poids_tracte_freine,poids_tracte_non_freine,Age_Vehicule
0,Ineos,Grenadier,55075273,4.9,1.93,2.05,2.92,90,0.887,0.874,...,5,1152,2035,2669,3500,7000,831,3500,750,0
1,Mercedes-Benz,Classe C,24613286,4.52,1.72,1.43,2.72,62,0.88,0.96,...,5,465,1510,1535,2015,3515,480,1500,750,20
2,Jaguar,S-Type,27841422,4.91,1.82,1.45,2.91,70,0.88,0.96,...,5,400,810,1722,2255,4105,533,1850,750,19
3,Bmw,Série 5,29377040,4.84,1.85,1.47,2.89,70,0.88,0.96,...,5,520,1559,1580,2065,4065,485,2000,750,21
4,Opel,Combo,12418476,4.32,1.68,1.8,2.72,52,0.88,0.96,...,5,455,2700,1290,1855,2855,565,1000,702,18
5,Lexus,IS,24436356,4.59,1.8,1.44,2.73,65,0.82,1.035,...,5,398,1559,1540,2075,3575,490,1500,560,14
6,Renault,Scenic,18894778,4.37,1.85,1.64,2.7,60,0.885,0.757,...,5,470,1870,1385,1944,3264,404,1300,730,12
7,Peugeot,307,15489712,4.2,1.75,1.51,2.61,60,0.878,0.716,...,5,341,1328,1279,1779,3199,500,1340,702,21
8,motos-cyclos,Supermoto,5880749,4.54,1.81,1.62,2.76,59,0.88,0.96,...,4,492,1559,1485,2222,3906,618,1701,702,17
9,Mini,Clubman,25905208,4.27,1.8,1.44,2.67,48,0.801,0.795,...,5,360,1250,1405,1930,3906,525,1200,740,2


In [16]:
df.shape

(6310, 53)

#### Data Encoding |  Encodage Label

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6310 entries, 0 to 6309
Data columns (total 53 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Marque                    6310 non-null   object 
 1   Modele                    6310 non-null   object 
 2   Prix                      6310 non-null   int64  
 3   longueur                  6310 non-null   float64
 4   largeur                   6310 non-null   float64
 5   hauteur                   6310 non-null   float64
 6   empattement               6310 non-null   float64
 7   reservoir                 6310 non-null   int64  
 8   porte_a_faux_avant        6310 non-null   float64
 9   porte_a_faux_arriere      6310 non-null   float64
 10  voies_avant               6310 non-null   float64
 11  voies_arriere             6310 non-null   float64
 12  Energie                   6310 non-null   object 
 13  Architecture              6310 non-null   object 
 14  Alimenta

In [18]:
cols_to_encode = [
    'Marque', 
    'Modele',
    'Energie',
    'Architecture',
    'Alimentation',
    'Injection',
    'Norme_anti-pollution',
    'Disposition_du_moteur',
    'types_de_pneumatiques',
    'materiau_des_jantes',
    'taille_des_roues_avant',
    'taille_des_roues_arriere',
    'type_de_roues_de_secours',
    'carrosserie',
    'Boite_de_vitesses',
    'Mode_de_transmission',
]
df_encoded = label_encode(df, cols_to_encode)
df_encoded.head()

Unnamed: 0,Marque,Modele,Prix,longueur,largeur,hauteur,empattement,reservoir,porte_a_faux_avant,porte_a_faux_arriere,...,nombre_de_places,volume_de_coffre,volume_de_coffre_utile,poids_a_vide,ptac,ptra,charge_utile,poids_tracte_freine,poids_tracte_non_freine,Age_Vehicule
0,21,398,55075273,4.9,1.93,2.05,2.92,90,0.887,0.874,...,5,1152,2035,2669,3500,7000,831,3500,750,0
1,32,204,24613286,4.52,1.72,1.43,2.72,62,0.88,0.96,...,5,465,1510,1535,2015,3515,480,1500,750,20
2,23,722,27841422,4.91,1.82,1.45,2.91,70,0.88,0.96,...,5,400,810,1722,2255,4105,533,1850,750,19
3,4,828,29377040,4.84,1.85,1.47,2.89,70,0.88,0.96,...,5,520,1559,1580,2065,4065,485,2000,750,21
4,38,225,12418476,4.32,1.68,1.8,2.72,52,0.88,0.96,...,5,455,2700,1290,1855,2855,565,1000,702,18


#### Converti en un objet tf.Tensor de TensorFlow

In [19]:
tensor_data = tf.constant(df_encoded, dtype= tf.float32)
tensor_data[:5]

<tf.Tensor: shape=(5, 53), dtype=float32, numpy=
array([[2.1000000e+01, 3.9800000e+02, 5.5075272e+07, 4.9000001e+00,
        1.9299999e+00, 2.0500000e+00, 2.9200001e+00, 9.0000000e+01,
        8.8700002e-01, 8.7400001e-01, 1.6450000e+00, 1.6450000e+00,
        2.0000000e+00, 2.9000000e+01, 6.0000000e+00, 1.0000000e+01,
        2.9980000e+03, 4.7500000e+03, 4.5000000e+02, 2.4000000e+01,
        1.1000000e+01, 4.0000000e+00, 3.0000000e+00, 2.8600000e+02,
        2.1000000e+02, 8.2000000e+01, 9.4000000e+01, 1.4400000e+01,
        3.2500000e+02, 8.1300001e+00, 5.3000002e+00, 0.0000000e+00,
        2.0000000e+00, 2.7000000e+02, 2.7400000e+02, 6.0000000e+00,
        0.0000000e+00, 1.4400000e+01, 0.0000000e+00, 1.6000000e+02,
        8.6000004e+00, 1.2000000e+01, 1.0000000e+01, 5.0000000e+00,
        1.1520000e+03, 2.0350000e+03, 2.6690000e+03, 3.5000000e+03,
        7.0000000e+03, 8.3100000e+02, 3.5000000e+03, 7.5000000e+02,
        0.0000000e+00],
       [3.2000000e+01, 2.0400000e+02, 2.461

#### Mélanger aléatoirement les éléments d'un tensor

In [20]:
tensor_data = tf.random.shuffle(tensor_data)
tensor_data[:5]

<tf.Tensor: shape=(5, 53), dtype=float32, numpy=
array([[4.7000000e+01, 7.0800000e+02, 1.7192244e+07, 3.4300001e+00,
        1.6200000e+00, 1.1900001e+00, 2.3599999e+00, 3.5000000e+01,
        8.8000000e-01, 9.5999998e-01, 1.5500000e+00, 1.5500000e+00,
        2.0000000e+00, 3.4000000e+01, 6.0000000e+00, 1.7000000e+01,
        6.9800000e+02, 2.5000000e+03, 1.3000000e+02, 6.0000000e+00,
        9.0000000e+00, 2.0000000e+00, 6.0000000e+00, 1.0100000e+02,
        7.4000000e+01, 6.6000000e+01, 6.7000000e+01, 5.1999998e+00,
        1.2200000e+02, 6.4000001e+00, 4.5000000e+00, 7.0000000e+00,
        2.0000000e+00, 1.1300000e+02, 1.1900000e+02, 3.0000000e+00,
        5.0000000e+00, 5.1999998e+00, 1.1000000e+01, 1.9000000e+02,
        9.8000002e+00, 9.0000000e+00, 2.0000000e+00, 2.0000000e+00,
        4.9200000e+02, 1.5590000e+03, 8.9500000e+02, 1.0300000e+03,
        3.9060000e+03, 1.3500000e+02, 1.7010000e+03, 7.0200000e+02,
        2.0000000e+01],
       [3.7000000e+01, 6.5800000e+02, 1.838

## 1.7 Data Preparation

#### Features Extraction

In [21]:
target_index = 2

y = tensor_data[:, target_index]
X = tf.concat([tensor_data[:, :target_index], tensor_data[:, target_index + 1:]], axis=1)
print("Shape of X:", X.shape)
print("Shape of Y:", y.shape)

Shape of X: (6310, 52)
Shape of Y: (6310,)


#### Ajout de dimension supplémentaire

In [22]:
y = tf.expand_dims(y, axis = 1)
y.shape

TensorShape([6310, 1])

In [23]:
y[:5,:]

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[17192244.],
       [18387356.],
       [26693048.],
       [32047680.],
       [13346523.]], dtype=float32)>

In [24]:
DATASET_SIZE = len(X)

## 1.8 Division des donnée

In [25]:
X_train = X[:int(DATASET_SIZE*TRAIN_RATIO)]
y_train = y[:int(DATASET_SIZE*TRAIN_RATIO)]

X_val = X[int(DATASET_SIZE*TRAIN_RATIO):int((VAL_RATIO + TRAIN_RATIO)*DATASET_SIZE)]
y_val = y[int(DATASET_SIZE*TRAIN_RATIO):int((VAL_RATIO + TRAIN_RATIO)*DATASET_SIZE)]

X_test = X[int((VAL_RATIO + TRAIN_RATIO)*DATASET_SIZE):]
y_test = y[int((VAL_RATIO + TRAIN_RATIO)*DATASET_SIZE):]

In [26]:
X_train.shape[1]

52

## 1.9 Data Pipeline Components
- Création du Dataset 
- Mélange des Données
- Division en Mini-lots
- Préchargement

In [27]:
def create_data_pipeline(X, y, normalizer=None, batch_size=32, buffer_size=52):
    """
    Crée un pipeline de données TensorFlow avec les étapes de prétraitement standard.

    Args:
        X (tf.Tensor or np.array): Données d'entrée.
        y (tf.Tensor or np.array): Étiquettes cibles.
        normalizer (tf.keras.layers.Normalization, optional): Objet de normalisation à appliquer.
        batch_size (int): Taille des mini-lots.
        buffer_size (int): Taille du tampon pour le mélange des données.

    Returns:
        tf.data.Dataset: Pipeline de données prêt à être utilisé pour l'entraînement.
    """
    dataset = tf.data.Dataset.from_tensor_slices((X, y))

    if normalizer is not None:
        dataset = dataset.map(lambda x, y: (normalizer(x), y))

    dataset = dataset.shuffle(buffer_size=buffer_size, reshuffle_each_iteration=True)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    
    return dataset

In [28]:
# Créez les pipelines de données
train_dataset = create_data_pipeline(X_train, y_train)
val_dataset = create_data_pipeline(X_val, y_val)
test_dataset = create_data_pipeline(X_test, y_test)

In [29]:
train_dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 52), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.float32, name=None))>

In [None]:
for element in train_dataset.take(1):
    print(element)
    break

## 1.10 Normalizing

In [30]:
# Créez le normaliseur et adaptez-le sur les données d'entraînement
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(X_train)
normalizer(X_train)[:5]

<tf.Tensor: shape=(5, 52), dtype=float32, numpy=
array([[ 6.65436327e-01,  8.01083267e-01, -1.49771941e+00,
        -8.88107598e-01, -1.42575443e+00, -9.42961872e-01,
        -1.31746316e+00,  1.90192033e-02, -7.23054633e-03,
        -3.57454866e-02, -1.34390742e-02,  8.61060679e-01,
         1.92867339e+00, -5.94275415e-01,  1.10196328e+00,
        -1.51587617e+00, -7.48512268e-01, -1.24960923e+00,
        -2.03395200e+00, -1.89349782e+00, -1.84816802e+00,
        -7.93628842e-02, -6.87495589e-01, -6.96752787e-01,
        -2.75476742e+00, -2.63443637e+00, -6.03471577e-01,
        -7.52917469e-01, -7.00687408e-01, -6.99461639e-01,
        -1.13979399e-01,  4.78934109e-01, -4.32801008e-01,
        -4.10026550e-01, -3.18119854e-01, -6.57927155e-01,
        -1.23702608e-01, -2.57460535e-01, -3.70950669e-01,
        -3.57275344e-02, -1.77078688e+00, -1.24281824e+00,
        -2.46429491e+00, -4.42980370e-03, -3.62277357e-03,
        -1.53818917e+00, -2.20527792e+00, -3.74771492e-03,
       

## 2. Build Model

### 2.1 Model Simple

In [None]:
with mlflow.start_run(run_name="vehicle_predict_single"):
    # Suivi automatique de TensorFlow
    mlflow.tensorflow.autolog()

    # Créer le modèle séquentiel
    model = tf.keras.Sequential([
        InputLayer(shape=(52,)),
        normalizer,
        Dense(128, activation=ReLU()),
        Dense(128, activation=ReLU()),
        Dropout(0.2),
        Dense(128, activation=ReLU()),
        Dense(1)
    ])

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss=MeanAbsoluteError(),
                  metrics=['root_mean_squared_error'])

    # stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

    history = model.fit(train_dataset, validation_data=val_dataset, epochs=epochs)

    mlflow.log_param("batch_size", batch_size)
    mlflow.log_metric("final_loss", model.evaluate(test_dataset))

In [None]:
model.summary()

In [32]:
# tf.keras.utils.plot_model(model, to_file = "model.png", show_shapes=True)
mlflow.end_run()

### 2.2 Build Model with Search Hyperparameters

In [None]:
def build_model(hp):
    try:
        # Choisissez une valeur optimale pour chaque couche
        dense_1_units = hp.Int('dense_1_units', min_value=32, max_value=128, step=32)
        dense_2_units = hp.Int('dense_2_units', min_value=16, max_value=64, step=32)
        dense_3_units = hp.Int('dense_3_units', min_value=8, max_value=32, step=32)
    
        # Définir le taux de dropout
        dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)
        
        # Réglez le taux d'apprentissage de l'optimiseur
        # Choisissez une valeur optimale entre 0,01, 0,001 ou 0,0001
        hp_learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    
        # Créer le modèle séquentiel
        model_ = tf.keras.Sequential([
            Input(shape=(52,)),
            normalizer,
            Dense(dense_1_units, activation=ReLU()),
            Dense(dense_2_units, activation=ReLU()),
            Dropout(dropout_rate),
            Dense(dense_3_units, activation=ReLU()),
            Dense(1)
        ])
    
        # Compiler le modèle
        model_.compile(optimizer=Adam(learning_rate=hp_learning_rate),
                       loss=MeanAbsoluteError(),
                       metrics=['root_mean_squared_error'])
    
        return model_
    except Exception as e:
        print(f"Error in build_model: {e}")
        raise e

In [None]:
mlflow.start_run(run_name="vehicle_prediction_model_tensorflow")
# Suivi automatique de TensorFlow
mlflow.tensorflow.autolog()

#### 2.1 Définir les paramètres de la grille

#### Définir un Hyperband tuner et rechercher les hyperparamètres

In [None]:
tuner = kt.Hyperband(
    build_model,
    objective='val_root_mean_squared_error',
    max_epochs=epochs,
    factor=3,
    directory='../vehicle_prediction',  # Dossier pour enregistrer les résultats
    project_name='vehicle_prediction'
)

# Callback pour enregistrer les résultats dans MLflow
mlflow_callback = tf.keras.callbacks.LambdaCallback(
    on_epoch_end=lambda epoch, logs: mlflow.log_metrics(logs, step=epoch)
)

# Définir un callback pour arrêter la recherche si la performance ne s'améliore pas
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Lancer la recherche des hyperparamètres
tuner.search(train_dataset, epochs=epochs, validation_data=val_dataset, callbacks=[stop_early, mlflow_callback])

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Enregistrer les meilleurs hyperparamètres dans MLflow
mlflow.log_params({
    'dense_1_units': best_hps.get('dense_1_units'),
    'dense_2_units': best_hps.get('dense_2_units'),
    'dropout_rate': best_hps.get('dropout_rate'),
    'learning_rate': best_hps.get('learning_rate')
})

print(f"""
La recherche d'hyperparamètres est terminée. Le nombre optimal d'unités dans la première
couche densément connectée est {best_hps.get('units')} et le taux d'apprentissage optimal pour l'optimiseur
est {best_hps.get('learning_rate')}.
""")

#### 2.2 Entrainement du model

In [None]:
# Construisez le modèle avec les hyperparamètres optimaux et entraînez-le sur les données de 50 époques
model = tuner.hypermodel.build(best_hps)
history = model.fit(train_dataset, epochs=epochs, validation_data=val_dataset, verbose=1)

In [None]:
val_acc_per_epoch = history.history['val_loss']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

#### Réinstanciez l'hypermodèle et entraînez-le avec le nombre optimal d'époques d'en haut.

In [None]:
hyper_model = tuner.hypermodel.build(best_hps)
hyper_model = model.fit(train_dataset, epochs=best_epoch, validation_data=val_dataset)

#### Evaluation du Modele

In [None]:
eval_result = hyper_model.evaluate(test_dataset)
print("[test loss, test loss]:", eval_result)

##### Suivi des métriques personnalisées

In [None]:
# Suivi des métriques personnalisées
mlflow.log_param("batch_size", batch_size)
mlflow.log_metric("final_loss", model.evaluate(test_dataset))

#### Enregistrer le modèle dans MLflow

In [None]:
# Enregistrer le modèle dans MLflow
# mlflow.tensorflow.log_model(model, "model")
# mlflow.tensorflow.log_model(hyper_model, "hyper_model")

In [None]:
mlflow.end_run()

#### 2.3  Visualisation du Loss

In [None]:
fig = plt.figure(figsize=(12,6))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("model Loss")
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(['train','val_loss'])
plt.show()

In [None]:
fig = plt.figure(figsize=(12,6))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title("model Accuracy")
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train','accuracy'])
plt.show()

In [None]:
history.history

#### 2. 4 Evaluation du model

In [None]:
test = model.evaluate(X_test, y_test)

In [None]:
test

#### Prédiction des Valeurs

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred.shape

### Comparaison des Resulat

In [None]:
y_pred[:5]

In [None]:
y_test[:5]

In [None]:
plt.plot(y_pred)
plt.plot(y_test)
plt.ylabel("price")
plt.legend(["predicted", "actual"])
plt.show()