# Implémentation des modèles avec TensorFlow

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from scikeras.wrappers import KerasRegressor, KerasClassifier
from sklearn.model_selection import GridSearchCV

import os
import sys
sys.path.append(os.path.abspath(os.path.join('..')))
from src.tensorflow.tf_wrapper import *

In [2]:
df = pd.read_csv("../data/health_lifestyle_dataset_cleaned.csv")

In [3]:
regression_target = ['cholesterol', 'calories_consumed']
features_reg = df.drop(columns=regression_target).values
regression_labels = df[regression_target].values

# Régression Linéaire

On cherche à prédire le taux de cholesterol et les calories consomées (les colonnes ```cholesterol``` et ```calories_consumed```).

In [4]:
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(features_reg, regression_labels, test_size=0.2, random_state=42)

In [5]:
model_reg = KerasRegressor(
    model=build_tf_regressor,
    nb_features=X_train_reg.shape[1],
    layers_count=2,
    width=64,
    activation='relu',
    dropout_rate=0.0,
    learning_rate=1e-3,
    epochs=10,
    batch_size=32,
    verbose=1
)

In [6]:
# Notez l'utilisation du préfixe "model__" pour les arguments de build_tf_regressor
param_grid_reg = {
    "model__layers_count": [2, 3],
    "model__width": [64, 128],
    "model__activation": ["relu", "tanh"],
    "model__dropout_rate": [0.0, 0.2],
    "model__learning_rate": [1e-3],
    "epochs": [20, 30],
    "batch_size": [32]
}

In [7]:
grid = GridSearchCV(estimator=model_reg, param_grid=param_grid_reg, cv=3, scoring="r2", n_jobs=-1)
grid_result_reg = grid.fit(X_train_reg, y_train_reg)

print(f"Meilleur score : {grid_result_reg.best_score_}")
print(f"Meilleurs paramètres : {grid_result_reg.best_params_}")

Epoch 1/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 41.6402  
Epoch 2/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 18.7549
Epoch 3/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 18.7534
Epoch 4/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 980us/step - loss: 18.7553
Epoch 5/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 18.7548
Epoch 6/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 18.7553
Epoch 7/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 18.7539
Epoch 8/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 979us/step - loss: 18.7539
Epoch 9/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 18.7543
Epoch 10/20
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0

Meilleur score : -0.0007200493621216664

Meilleurs paramètres : {'batch_size': 32, 'epochs': 20, 'model__activation': 'tanh', 'model__dropout_rate': 0.0, 'model__layers_count': 3, 'model__learning_rate': 0.001, 'model__width': 128}

In [8]:
best_model_reg = grid_result_reg.best_estimator_

In [9]:
y_pred = best_model_reg.predict(X_train_reg)

mse = mean_squared_error(y_train_reg, y_pred)
mae = mean_absolute_error(y_train_reg, y_pred)
r2 = r2_score(y_train_reg, y_pred)

print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R2 Score: {r2:.4f}")

[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 460us/step
MSE: 939.2649
MAE: 19.2016
R2 Score: -0.0001


MSE: 939.2649

MAE: 19.2016

R2 Score: -0.0001

In [10]:
y_pred = best_model_reg.predict(X_test_reg)

mse = mean_squared_error(y_test_reg, y_pred)
mae = mean_absolute_error(y_test_reg, y_pred)
r2 = r2_score(y_test_reg, y_pred)

print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R2 Score: {r2:.4f}")

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482us/step
MSE: 939.5472
MAE: 19.2342
R2 Score: -0.0002


MSE: 939.5472

MAE: 19.2342

R2 Score: -0.0002

# Classification

On cherche à prédire s'il y a un risque de maladie (colonne ```disease_risk```).

In [11]:
classification_target = 'disease_risk'
features_clas = df.drop(columns=classification_target).values
classification_labels = df[classification_target].values

In [12]:
X_train_clas, X_test_clas, y_train_clas, y_test_clas = train_test_split(features_clas, classification_labels, test_size=0.2, random_state=42)

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

In [13]:
model_clas = KerasClassifier(
    model=build_tf_classifier,
    nb_features=X_train_clas.shape[1],
    layers_count=2,
    width=64,
    activation='relu',
    dropout_rate=0.0,
    learning_rate=1e-3,
    epochs=20,
    batch_size=32,
    verbose=1
)

In [14]:
param_grid_clas = {
    "model__layers_count": [2, 3],
    "model__width": [64, 128],
    "model__activation": ["relu", "tanh"],
    "model__dropout_rate": [0.0, 0.5],
    "model__learning_rate": [1e-2, 1e-3],
    "epochs": [30],
    "batch_size": [32, 64]
}

In [15]:
grid_clas = GridSearchCV(estimator=model_clas, param_grid=param_grid_clas, cv=skf, scoring="accuracy", n_jobs=-1)
grid_clas_result = grid_clas.fit(X_train_clas, y_train_clas)

print(f"Meilleure Accuracy : {grid_clas_result.best_score_:.4f}")
print(f"Meilleurs paramètres : {grid_clas_result.best_params_}")

Epoch 1/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 770us/step - loss: 0.6183
Epoch 2/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 752us/step - loss: 0.5606
Epoch 3/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 762us/step - loss: 0.5606
Epoch 4/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 766us/step - loss: 0.5606
Epoch 5/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 765us/step - loss: 0.5607
Epoch 6/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 756us/step - loss: 0.5606
Epoch 7/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 761us/step - loss: 0.5607
Epoch 8/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 758us/step - loss: 0.5607
Epoch 9/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 790us/step - loss: 0.5607
Epoch 10/30
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━

Meilleure Accuracy : 0.7517

Meilleurs paramètres : {'batch_size': 32, 'epochs': 30, 'model__activation': 'relu', 'model__dropout_rate': 0.0, 'model__layers_count': 2, 'model__learning_rate': 0.01, 'model__width': 64}

In [16]:
best_model_clas = grid_clas_result.best_estimator_

In [17]:
y_pred = best_model_clas.predict(X_train_clas)
f1 = f1_score(y_train_clas, y_pred, average='weighted')
accuracy = accuracy_score(y_train_clas, y_pred)
print(f"Train F1: {f1:.4f}, Accuracy: {accuracy:.4f}")

[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 429us/step
Train F1: 0.6452, Accuracy: 0.7517


Train F1: 0.6452, Accuracy: 0.7517

In [18]:
y_pred = best_model_clas.predict(X_test_clas)
f1 = f1_score(y_test_clas, y_pred, average='weighted')
accuracy = accuracy_score(y_test_clas, y_pred)
print(f"Test F1: {f1:.4f}, Accuracy: {accuracy:.4f}")

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 556us/step
Test F1: 0.6457, Accuracy: 0.7521


Test F1: 0.6457, Accuracy: 0.7521