# Implémentation des modèles avec TensorFlow

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import GridSearchCV

import os
import sys
sys.path.append(os.path.abspath(os.path.join('..')))
from src.tensorflow.tf_wrapper import *

In [None]:
df = pd.read_csv("../data/health_lifestyle_dataset_cleaned.csv")

In [None]:
regression_target = ['cholesterol', 'calories_consumed']
features_reg = df.drop(columns=regression_target).values
regression_labels = df[regression_target].values

# Régression Linéaire

On cherche à prédire le taux de cholesterol et les calories consomées (les colonnes ```cholesterol``` et ```calories_consumed```).

In [None]:
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(features_reg, regression_labels, test_size=0.2, random_state=42)

In [None]:
model_reg = KerasRegressor(
    model=build_tf_regressor,
    nb_features=X_train_reg.shape[1],
    layers_count=2,
    width=64,
    activation='relu',
    dropout_rate=0.0,
    learning_rate=1e-3,
    epochs=10,
    batch_size=32,
    verbose=1
)

In [None]:
# Notez l'utilisation du préfixe "model__" pour les arguments de build_tf_regressor
param_grid_reg = {
    "model__layers_count": [2, 3],
    "model__width": [64, 128],
    "model__activation": ["relu", "tanh"],
    "model__dropout_rate": [0.0, 0.2],
    "model__learning_rate": [1e-3],
    "epochs": [20, 30],
    "batch_size": [32]
}

In [None]:
grid = GridSearchCV(estimator=model_reg, param_grid=param_grid_reg, cv=3, scoring="r2", n_jobs=-1)
grid_result_reg = grid.fit(X_train_reg, y_train_reg)

print(f"Meilleur score : {grid_result_reg.best_score_}")
print(f"Meilleurs paramètres : {grid_result_reg.best_params_}")

In [None]:
best_model_reg = grid_result_reg.best_estimator_

In [None]:
y_pred = best_model_reg.predict(X_train_reg)

mse = mean_squared_error(y_train_reg, y_pred)
mae = mean_absolute_error(y_train_reg, y_pred)
r2 = r2_score(y_train_reg, y_pred)

print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R2 Score: {r2:.4f}")

In [None]:
y_pred = best_model_reg.predict(X_test_reg)

mse = mean_squared_error(y_test_reg, y_pred)
mae = mean_absolute_error(y_test_reg, y_pred)
r2 = r2_score(y_test_reg, y_pred)

print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R2 Score: {r2:.4f}")

# Classification

On cherche à prédire s'il y a un risque de maladie (colonne ```disease_risk```).

In [None]:
classification_target = 'disease_risk'
features_clas = df.drop(columns=classification_target).values
classification_labels = df[classification_target].values

In [None]:
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(features_reg, regression_labels, test_size=0.2, random_state=42)