# Prévision de la demande — Projet vitrine

This notebook prepares data, trains models and evaluates them. It is tailored for a logistics/industry use-case.

In [1]:
import pandas as pd
import numpy as np
csv_path = r"c:\Users\adela\OneDrive\Bureau\Projet_Industrie\demande_logistique_dataset.csv"
df = pd.read_csv(csv_path)
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,date,site_id,produit_id,quantite_demandee,stock_disponible,temps_livraison_moyen_j,jour_semaine,meteo,temp_celsius,promo_active
0,2025-07-01,SITE_B,PROD_001,239,717,2,Mardi,Ensoleillé,11,0
1,2025-07-02,SITE_B,PROD_003,188,322,2,Mercredi,Pluvieux,25,0
2,2025-07-03,SITE_D,PROD_003,160,633,4,Jeudi,Pluvieux,20,1
3,2025-07-04,SITE_B,PROD_001,34,252,3,Vendredi,Pluvieux,29,0
4,2025-07-05,SITE_A,PROD_002,42,262,1,Samedi,Pluvieux,19,0


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error
import lightgbm as lgb
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# ==============================
# 1️⃣ Préparation des features enrichies
# ==============================
df = df.sort_values(['site_id','produit_id','date']).reset_index(drop=True)

# Variables calendrier
df['dayofweek'] = df['date'].dt.weekday
df['month'] = df['date'].dt.month
df['is_weekend'] = df['dayofweek'].isin([5,6]).astype(int)

# Encodage cyclique
df['day_sin'] = np.sin(2 * np.pi * df['dayofweek'] / 7)
df['day_cos'] = np.cos(2 * np.pi * df['dayofweek'] / 7)
df['month_sin'] = np.sin(2 * np.pi * (df['month']-1) / 12)
df['month_cos'] = np.cos(2 * np.pi * (df['month']-1) / 12)

# Lags multiples
for lag in [1, 7, 14]:
    df[f'demand_shift_{lag}'] = df.groupby(['site_id','produit_id'])['quantite_demandee'].shift(lag)

# Moyennes mobiles
for w in [3, 7, 14, 30]:
    df[f'demand_roll_{w}'] = df.groupby(['site_id','produit_id'])['quantite_demandee'] \
                               .transform(lambda x: x.shift(1).rolling(window=w, min_periods=1).mean())

# Différences pour tendance
df['demand_diff_1'] = df.groupby(['site_id','produit_id'])['quantite_demandee'].diff(1)
df['demand_diff_7'] = df.groupby(['site_id','produit_id'])['quantite_demandee'].diff(7)

# Remplir les valeurs manquantes par médiane par groupe
for col in df.columns:
    if df[col].isna().any():
        df[col] = df.groupby(['site_id','produit_id'])[col].transform(lambda x: x.fillna(x.median()))

# Encodage catégoriel
df = pd.get_dummies(df, columns=['site_id','produit_id','meteo','jour_semaine'], drop_first=True)

In [4]:
# ==============================
# 2️⃣ Split train/test
# ==============================
X = df.drop(columns=['date','quantite_demandee'])
y = df['quantite_demandee']
split_index = int(len(df)*0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

In [5]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
# --- 3. Modélisation avec XGBoost ---
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")


MAE: 10.64
RMSE: 17.25


In [7]:
# ==============================
# 3️⃣ Optimisation LightGBM avec Optuna
# ==============================
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'verbosity': -1,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 200),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7)
    }
    
    model = lgb.LGBMRegressor(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return rmse

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)

[I 2025-08-13 16:54:40,801] A new study created in memory with name: no-name-21fd33ec-47b3-4819-857a-b2a1f59e9f62
[I 2025-08-13 16:54:43,032] Trial 0 finished with value: 17.24865275111409 and parameters: {'learning_rate': 0.09222773295678245, 'num_leaves': 46, 'max_depth': 15, 'min_data_in_leaf': 21, 'feature_fraction': 0.9391439020293932, 'bagging_fraction': 0.91853647928786, 'bagging_freq': 7}. Best is trial 0 with value: 17.24865275111409.
[I 2025-08-13 16:54:43,039] Trial 1 finished with value: 17.24865275111409 and parameters: {'learning_rate': 0.09592910367437231, 'num_leaves': 140, 'max_depth': 14, 'min_data_in_leaf': 126, 'feature_fraction': 0.8841200806049748, 'bagging_fraction': 0.5208068312214376, 'bagging_freq': 4}. Best is trial 0 with value: 17.24865275111409.
[I 2025-08-13 16:54:43,052] Trial 2 finished with value: 17.24865275111409 and parameters: {'learning_rate': 0.03278055531117134, 'num_leaves': 127, 'max_depth': 5, 'min_data_in_leaf': 65, 'feature_fraction': 0.848

In [8]:
# ==============================
# 4️⃣ Entraînement final avec meilleurs paramètres
# ==============================
best_params = study.best_params
best_model = lgb.LGBMRegressor(**best_params)
best_model.fit(X_train, y_train)

0,1,2
,boosting_type,'gbdt'
,num_leaves,46
,max_depth,15
,learning_rate,0.09222773295678245
,n_estimators,100
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [10]:
# ==============================
# 5️⃣ Évaluation
# ==============================
preds = best_model.predict(X_test)
mae = mean_absolute_error(y_test, preds)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("✅ Meilleurs paramètres :", best_params)
print(f"📉 MAE : {mae:.2f}")
print(f"📉 RMSE : {rmse:.2f}")


✅ Meilleurs paramètres : {'learning_rate': 0.09222773295678245, 'num_leaves': 46, 'max_depth': 15, 'min_data_in_leaf': 21, 'feature_fraction': 0.9391439020293932, 'bagging_fraction': 0.91853647928786, 'bagging_freq': 7}
📉 MAE : 9.65
📉 RMSE : 17.25
