In [68]:
import keras
import pandas as pd
from sklearn.model_selection import train_test_split

import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, f1_score
from keras.models import Sequential
from keras.layers import Dense, Activation
from scikeras.wrappers import KerasClassifier
from tqdm import tqdm

In [69]:
# Lecture des données
orders_distance_stores_softmax = pd.read_csv("données/orders_distance_stores_softmax.csv", index_col=0)
orders_products_prior_specials = pd.read_csv("données/order_products__prior_specials.csv", index_col=0)

In [70]:
orders = pd.merge(orders_distance_stores_softmax, orders_products_prior_specials, on='order_id', how='inner')

In [71]:
#show columns with their types
orders.columns

Index(['user_id', 'store_id', 'distance', 'order_id', 'eval_set',
       'order_number', 'order_dow', 'order_hour_of_day',
       'days_since_prior_order', 'product_id', 'add_to_cart_order',
       'reordered', 'special'],
      dtype='object')

In [72]:
orders = orders.sample(frac=0.05, random_state=42)

In [73]:
#show the first 5 rows
orders.head()

Unnamed: 0,user_id,store_id,distance,order_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered,special
86058,12166,3,2.772836,243435,prior,88,0,13,21.0,48290,10,1,0
1170067,205543,9,0.386416,1425899,prior,22,6,9,0.0,3481,6,1,0
852677,148902,0,0.349984,1455360,prior,36,4,18,4.0,41720,8,1,30
346398,59106,9,1.040265,2683498,prior,83,2,17,3.0,5876,5,1,0
1045727,182401,1,0.804848,2474304,prior,86,5,14,5.0,4562,15,1,15


In [74]:
#TO REDO
orders.dropna(inplace=True)
# drop the eval_set column
orders.drop(columns=['eval_set'], inplace=True)

In [75]:
# Charger vos données depuis votre DataFrame
# Assumons que votre DataFrame est nommé "data"

# Séparer les fonctionnalités (X) de la cible (y)
X = orders.drop(columns=['reordered'])
y = orders['reordered']

# Diviser l'ensemble de données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Diviser l'ensemble d'entraînement en ensembles d'entraînement et de validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Vérifier les formes des ensembles créés
print("Shape of X_train:", X_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of X_test:", X_test.shape)

Shape of X_train: (37072, 11)
Shape of X_val: (9269, 11)
Shape of X_test: (11586, 11)


In [76]:
from keras.utils import to_categorical

# Assuming y_train and y_test are your target labels
# Convert them to one-hot encoded vectors
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)
y_val_one_hot = to_categorical(y_val)

y_train_one_hot

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [0., 1.],
       [0., 1.]])

In [77]:
loss_fn = keras.losses.CategoricalCrossentropy()
metrics_fn = keras.metrics.BinaryAccuracy()

In [78]:
# Define a function to create the model based on hyperparameters
def create_model(optimizer='adam', neurons_layer1=64, neurons_layer2=64):
    model = Sequential()    
    model.add(keras.Input(shape=(X_train.shape[1],)))
    model.add(Dense(neurons_layer1, activation='relu'))
    model.add(Dense(neurons_layer2, activation='relu'))
    model.add(Dense(2, activation='softmax'))
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=[metrics_fn])
    return model

# Wrap the create_model function with KerasClassifier
keras_model = KerasClassifier(model=create_model, neurons_layer1=64, neurons_layer2=64, metrics=[metrics_fn])

# Define the grid of hyperparameters to search
param_grid = {
    'neurons_layer1': [32, 64, 128],
    'neurons_layer2': [32, 64, 128],
    'optimizer': ['adam', 'rmsprop']
}

# Define F1 score as the metric
f1_scorer = make_scorer(f1_score, average='binary')

# Initialize GridSearchCV with F1 score as the metric
grid = GridSearchCV(estimator=keras_model, param_grid=param_grid, cv=3, scoring=f1_scorer)
# Perform grid search
grid_result = grid.fit(X_train, y_train_one_hot, epochs=1, batch_size=64, validation_data=(X_val, y_val_one_hot),
                       verbose=1)

# Display results
print("Best parameters found: ", grid_result.best_params_)
print("Best F1 score found: ", grid_result.best_score_)

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 956us/step - binary_accuracy: 0.6506 - loss: 4052.6028 - val_binary_accuracy: 0.7782 - val_loss: 862.7466
[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 870us/step - binary_accuracy: 0.7111 - loss: 10705.0811 - val_binary_accuracy: 0.7812 - val_loss: 1654.4484
[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 892us/step - binary_accuracy: 0.7240 - loss: 3442.7368 - val_binary_accuracy: 0.7790 - val_loss: 1846.7892
[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 935us/step - binary_accuracy: 0.7268 - loss: 2397.4236 - val_binary_accuracy: 0.7809 - val_loss: 213.5937
[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 876us/step - binary_accuracy: 0.7015 - loss: 35494.7812 - val_binary_accuracy: 0.2578 - val_loss: 1503.4398
[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - binary_accuracy: 0.4505 - loss: 23005.2656 - val_binary_ac

ValueError: 
All the 54 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/scikeras/wrappers.py", line 1491, in fit
    super().fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/scikeras/wrappers.py", line 760, in fit
    self._fit(
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/scikeras/wrappers.py", line 928, in _fit
    self._fit_keras_model(
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/scikeras/wrappers.py", line 536, in _fit_keras_model
    raise e
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/scikeras/wrappers.py", line 531, in _fit_keras_model
    key = metric_name(key)
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/scikeras/utils/__init__.py", line 111, in metric_name
    fn_or_cls = keras_metric_get(metric)
  File "/home/local/USHERBROOKE/khao1201/PycharmProjects/TP2-IFT870/venv/lib/python3.10/site-packages/keras/src/metrics/__init__.py", line 204, in get
ValueError: Could not interpret metric identifier: loss
