# Model Training: Boosting Models (XGBoost & LightGBM)

Ce notebook charge les donn√©es pr√©trait√©es et entra√Æne des mod√®les de gradient boosting pour la pr√©vision de la demande.

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
import joblib
import os
import matplotlib.pyplot as plt

# Cr√©ation du dossier pour les mod√®les si n√©cessaire
os.makedirs('trained_models', exist_ok=True)

print("‚úÖ Biblioth√®ques import√©es et dossier v√©rifi√©.")

## Chargement des Donn√©es
Nous utilisons les datasets g√©n√©r√©s par le notebook de pr√©traitement.

In [None]:
train_path = 'train_boosting.csv'
test_path = 'test_boosting.csv'

if not os.path.exists(train_path) or not os.path.exists(test_path):
    raise FileNotFoundError("Les fichiers train_boosting.csv ou test_boosting.csv sont introuvables. Veuillez ex√©cuter le notebook pretraitement.ipynb d'abord.")

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

print(f"üì¶ Train shape: {train_df.shape}")
print(f"üì¶ Test shape: {test_df.shape}")

## Pr√©paration des Features et Target
Nous allons utiliser `target_7` pour pr√©dire la demande √† 7 jours. Nous excluons `target_30` des features. Nous filtrons aussi les colonnes non-num√©riques.

In [None]:
# D√©finition des cibles et features √† exclure
target_col = 'target_7'
drop_cols = ['target_7', 'target_30']

# S√©paration X et y
X_train = train_df.drop(columns=drop_cols, errors='ignore')
y_train = train_df[target_col]

X_test = test_df.drop(columns=drop_cols, errors='ignore')
y_test = test_df[target_col]

# Suppression des colonnes non num√©riques (Date, ID, etc.)
X_train = X_train.select_dtypes(include=[np.number])
X_test = X_test.select_dtypes(include=[np.number])

# Alignement des colonnes (au cas o√π)
X_train = X_train[X_test.columns]

print(f"Features utilis√©es: {len(X_train.columns)}")
print(f"Colonnes conserv√©es: {list(X_train.columns)}")

## Entra√Ænement XGBoost

In [None]:
print("‚è≥ Entra√Ænement XGBoost en cours...")
xgb_model = xgb.XGBRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    early_stopping_rounds=50,
    random_state=42,
    n_jobs=-1
)

xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_train, y_train), (X_test, y_test)],
    verbose=100
)

print("‚úÖ XGBoost entra√Æn√©.")

# Sauvegarde
xgb_path = 'trained_models/xgboost_model.pkl'
joblib.dump(xgb_model, xgb_path)
print(f"üíæ Mod√®le sauvegard√© sous : {xgb_path}")

## Entra√Ænement LightGBM

In [None]:
print("‚è≥ Entra√Ænement LightGBM en cours...")
lgb_model = lgb.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    num_leaves=31,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
)

lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    eval_metric='rmse',
    callbacks=[lgb.early_stopping(stopping_rounds=50), lgb.log_evaluation(100)]
)

print("‚úÖ LightGBM entra√Æn√©.")

# Sauvegarde
lgb_path = 'trained_models/lightgbm_model.pkl'
joblib.dump(lgb_model, lgb_path)
print(f"üíæ Mod√®le sauvegard√© sous : {lgb_path}")

## √âvaluation des Mod√®les

In [None]:
def evaluate_model(model, name, X, y):
    preds = model.predict(X)
    rmse = np.sqrt(mean_squared_error(y, preds))
    mae = mean_absolute_error(y, preds)
    print(f"--- {name} ---")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE:  {mae:.4f}")
    return preds

print("\nüìä √âvaluation sur le Test Set :")
xgb_preds = evaluate_model(xgb_model, "XGBoost", X_test, y_test)
lgb_preds = evaluate_model(lgb_model, "LightGBM", X_test, y_test)

# Visualisation rapide
plt.figure(figsize=(15, 6))
plt.plot(y_test.values[:100], label='R√©el', alpha=0.7)
plt.plot(xgb_preds[:100], label='XGBoost Pred', alpha=0.7)
plt.plot(lgb_preds[:100], label='LightGBM Pred', alpha=0.7)
plt.title("Comparaison Pr√©dictions vs R√©el (100 premiers points)")
plt.legend()
plt.show()