In [45]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import xgboost as xgb
from sklearn import metrics
import mlflow
import mlflow.sklearn


# Définition de nos variables cibles

In [46]:
data=pd.read_csv("../data/flight_data.csv")

In [34]:
Y = data['Price']
X_data = data.drop(['Price'],axis=1)

# Divisez les données en ensembles d’entraînement et de test

In [35]:
X_train_or, X_test, y_train_or, y_test = train_test_split(X, Y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train_or, y_train_or,test_size=0.25)

# Set Mlflow

In [39]:
!mlflow ui --backend-store-uri sqlite:///mlflow.db

^C


In [42]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("flight price predicion tracking")

2023/12/15 12:35:54 INFO mlflow.tracking.fluent: Experiment with name 'flight price predicion tracking' does not exist. Creating a new experiment.


<Experiment: artifact_location=('file:///C:/Users/Ce '
 'PC/Documents/MLProject/flight-price-prediction/notebooks/mlruns/1'), creation_time=1702640154703, experiment_id='1', last_update_time=1702640154703, lifecycle_stage='active', name='flight price predicion tracking', tags={}>

## Régression linéaire

In [47]:
with mlflow.start_run(run_name='Linear regression model'):
    reg_model=LinearRegression().fit(X_train,y_train)
    y_pred_val =reg_model.predict(X_val)

    mse_val =mean_squared_error(y_val,y_pred_val)
    mae_val=metrics.mean_absolute_error(y_val,y_pred_val)
    rmse_val=np.sqrt(metrics.mean_absolute_error(y_val,y_pred_val))
    r2_val =r2_score(y_test,y_pred_val)
    
    val_metrics = {'r2_score': r2_val, 'mse':mse_val,'rmse':rmse_val,'mae':mae_val}
    mlflow.log_metrics(val_metrics)
    mlflow.sklearn.log_model(reg_model,"Linear regression model")
mlflow.end_run()



## Random forest

In [48]:
with mlflow.start_run(run_name='Random Forest Regressor Model'):
    rf = RandomForestRegressor()
    rf.fit(X_train, y_train)
    y_pred_val_rf = rf.predict(X_val)

    mse_val_rf = mean_squared_error(y_val, y_pred_val_rf)
    mae_val_rf = metrics.mean_absolute_error(y_val, y_pred_val_rf)
    rmse_val_rf = np.sqrt(mean_squared_error(y_val, y_pred_val_rf))
    r2_val_rf = r2_score(y_val, y_pred_val_rf)

    val_metrics_rf = {'r2_score': r2_val_rf, 'mse': mse_val_rf, 'rmse': rmse_val_rf, 'mae': mae_val_rf}
    mlflow.log_metrics(val_metrics_rf)
    mlflow.sklearn.log_model(rf, "Random Forest Regressor Model")
mlflow.end_run()

## XG BOOST

In [50]:

with mlflow.start_run(run_name='XGBoost Regressor Model'):
    # Define hyperparameters
    xg_params = {
        'objective': 'reg:squarederror',
        'colsample_bytree': 0.3,
        'learning_rate': 0.1,
        'max_depth': 5,
        'alpha': 10,
        'n_estimators': 10
    }
    xg_reg = xgb.XGBRegressor(**xg_params) 

    xg_reg.fit(X_train, y_train)
    y_pred_val_xgb = xg_reg.predict(X_val)

    mse_val_xgb = mean_squared_error(y_val, y_pred_val_xgb)
    mae_val_xgb = metrics.mean_absolute_error(y_val, y_pred_val_xgb)
    rmse_val_xgb = np.sqrt(mean_squared_error(y_val, y_pred_val_xgb))
    r2_val_xgb = r2_score(y_val, y_pred_val_xgb)

    val_metrics_xgb = {'r2_score': r2_val_xgb, 'mse': mse_val_xgb, 'rmse': rmse_val_xgb, 'mae': mae_val_xgb}
    mlflow.log_metrics(val_metrics_xgb)
    mlflow.log_params(xg_params)
  
    mlflow.xgboost.log_model(xg_reg, "XGBoost Regressor Model")
    
mlflow.end_run()




In [52]:
with mlflow.start_run(run_name='Decision Tree Model'):
    dt_model = DecisionTreeRegressor()

    dt_model.fit(X_val, y_val)
    y_pred_val = dt_model.predict(X_val)


    mse_val =mean_squared_error(y_val,y_pred_val)
    mae_val=metrics.mean_absolute_error(y_val,y_pred_val)
    rmse_val=np.sqrt(metrics.mean_absolute_error(y_val,y_pred_val))
    r2_val =r2_score(y_test,y_pred_val)


    val_metrics = {'r2_score': r2_val, 'mse':mse_val,'rmse':rmse_val,'mae':mae_val}
    mlflow.log_metrics(val_metrics)
    mlflow.sklearn.log_model(dt_model,"Decision TreeRegressor model")
mlflow.end_run()