In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [56]:
# Charger les données
data = pd.read_csv("data/Prediction.csv")
data.head()

Unnamed: 0,cart_type,entreprise_email,entreprise_name,entreprise_city,day_publication,month_publication,hour_publication,quantity_published,quantity_reserved,quantity_take
0,frais,entreprise14@example.com,Carrefour,Nice,4,6,0,59,59,59
1,sec,entreprise13@example.com,Monoprix,Lyon,3,9,0,111,110,105
2,frais,entreprise7@example.com,Carrefour,Toulouse,4,3,0,194,155,155
3,frais,entreprise3@example.com,Auchan,Marseille,6,12,0,350,344,336
4,mixte,entreprise14@example.com,Carrefour,Nice,1,2,0,96,26,26


# Préparation des données

In [57]:
# Labeliser les colonnes categorielles
categorical_cols = ['cart_type', 'entreprise_email', 'entreprise_name', 'entreprise_city']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

In [58]:
# Variables explicatives et cible
X = data[['cart_type', 'entreprise_email', 'entreprise_name', 'entreprise_city', 'day_publication', 'month_publication', 'hour_publication', 'quantity_published', 'quantity_reserved']]
y = data['quantity_take']

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model de prédiction

In [59]:
# Choisir et entraîner un modèle de régression
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [60]:
# Prédictions sur les données de test
y_pred = model.predict(X_test)

# Évaluation du modèle
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

Mean Absolute Error (MAE): 17.99
Mean Squared Error (MSE): 2226.40
Root Mean Squared Error (RMSE): 47.18
R² Score: 0.89


# Test

In [61]:
# Exemple de cart à tester
new_cart = {
    "cart_type": "frais",
    "entreprise_name": "Carrefour",
    "entreprise_city": "Nice",
    "entreprise_email": "entreprise14@example.com",
    "day_publication": 2,
    "month_publication": 7,
    "hour_publication": 10,
    "quantity_published": 70,
    "quantity_reserved": 23
}

def predict_how_many_will_be_took(cart):
    # Encodage des colonnes catégorielles
    for col in ['cart_type', 'entreprise_email', 'entreprise_name', 'entreprise_city']:
        if cart[col] not in label_encoders[col].classes_:
            raise ValueError(f"La valeur '{cart[col]}' pour la colonne '{col}' n'existe pas dans l'encodage.")
        cart[col] = label_encoders[col].transform([cart[col]])[0]
    
    # Préparer les données pour le modèle
    test_input = np.array([[cart['cart_type'], 
                            cart['entreprise_name'], 
                            cart['entreprise_city'], 
                            cart['entreprise_email'], 
                            cart['day_publication'], 
                            cart['month_publication'], 
                            cart['hour_publication'], 
                            cart['quantity_published'],
                            cart['quantity_reserved']]])
    
    # Prédire la quantité prise
    predicted_quantity = model.predict(test_input)[0]
    print(f"{int(predicted_quantity)} sur {new_cart['quantity_published']} seront pris")

predict_how_many_will_be_took(new_cart)

22 sur 70 seront pris


