# TRAITEMENTS DES DONNEES

In [2]:
import os, sys, datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, StandardScaler
from sklearn.metrics import classification_report


df = pd.read_csv('churn.csv')
df['Churn']= df['Churn'].apply(lambda x : 1 if x=='Yes' else 0)

df['TotalCharges']=pd.to_numeric(df['TotalCharges'])
df = df.dropna()

X = df.drop(columns=["customerID","Churn"])

y = df["Churn"]

# Séparation train/val/test (80/20 puis 20% de train pour val)
X_train_0, X_test, y_train_0, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# On prend 20% de X_train pour validation
X_train, X_val, y_train, y_val = train_test_split(
    X_train_0, y_train_0, test_size=0.2, random_state=42, stratify=y_train_0)

features_of_interest = [
    'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
    'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
    'PaymentMethod', 'MonthlyCharges', 'TotalCharges'
]

numerical_column = ['tenure','MonthlyCharges','TotalCharges']


target_name = 'Churn'

categorical_column = [
    'gender','SeniorCitizen','Partner','Dependents','PhoneService','MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport',
    'StreamingTV','StreamingMovies','Contract','PaperlessBilling'
]

one_hot_column = ['PaymentMethod']

preprocessor = ColumnTransformer(
    [('categorical', OrdinalEncoder(), categorical_column),
     ('one_hot', OneHotEncoder(handle_unknown='ignore'), one_hot_column),
     ('numeric', StandardScaler(), numerical_column)
    ],
    remainder='passthrough'
)

X_train = preprocessor.fit_transform(X_train)
X_val  = preprocessor.transform(X_val)
X_test  = preprocessor.transform(X_test)

# OPTUNA

In [3]:
from sklearn.metrics import roc_auc_score
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

def objective(trial):
    # Hyperparamètres à tester
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    epochs = trial.suggest_int('epochs', 10, 50)
    n_layers = trial.suggest_int('n_layers', 1, 3)
    n_units = trial.suggest_int('n_units', 16, 128, step=16)

    # Définir le modèle
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))
    for _ in range(n_layers):
        model.add(Dense(n_units, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['AUC']
    )

    # Early stopping pour éviter l’overfitting
    early_stop = EarlyStopping(monitor='val_auc', mode='max', patience=5, restore_best_weights=True)

    # Entraînement
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        verbose=0,
        callbacks=[early_stop]
    )

    # Prédictions sur les probas
    y_pred_proba = model.predict(X_val)
    auc = roc_auc_score(y_val, y_pred_proba)

    return auc

In [4]:
import optuna

study = optuna.create_study(direction='maximize', storage="sqlite:///db.sqlite3", study_name='first_test')
study.optimize(objective, n_trials=20)

[I 2025-05-22 13:37:05,565] A new study created in RDB with name: first_test
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
E0000 00:00:1747913825.715713 1463396 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1747913825.716206 1463396 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:37:12,227] Trial 0 finished with value: 0.833672775271891 and parameters: {'learning_rate': 0.0003774151611533555, 'batch_size': 16, 'epochs': 13, 'n_layers': 2, 'n_units': 16}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:37:17,226] Trial 1 finished with value: 0.7892875363398577 and parameters: {'learning_rate': 0.0018234904653982825, 'batch_size': 64, 'epochs': 24, 'n_layers': 3, 'n_units': 64}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:37:22,261] Trial 2 finished with value: 0.7566322770817981 and parameters: {'learning_rate': 0.002876380798768084, 'batch_size': 64, 'epochs': 23, 'n_layers': 3, 'n_units': 96}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:37:29,474] Trial 3 finished with value: 0.8227121073473321 and parameters: {'learning_rate': 0.007025480657861147, 'batch_size': 16, 'epochs': 14, 'n_layers': 3, 'n_units': 64}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:37:42,222] Trial 4 finished with value: 0.7801246285034051 and parameters: {'learning_rate': 0.0076237702952138674, 'batch_size': 32, 'epochs': 46, 'n_layers': 1, 'n_units': 112}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:37:50,556] Trial 5 finished with value: 0.8127576182108238 and parameters: {'learning_rate': 0.09596883184776428, 'batch_size': 32, 'epochs': 28, 'n_layers': 3, 'n_units': 128}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


[I 2025-05-22 13:38:01,746] Trial 6 finished with value: 0.8300489120312259 and parameters: {'learning_rate': 0.00012368091306482132, 'batch_size': 32, 'epochs': 42, 'n_layers': 2, 'n_units': 64}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:38:11,209] Trial 7 finished with value: 0.8262347453578108 and parameters: {'learning_rate': 0.017062972321096756, 'batch_size': 16, 'epochs': 20, 'n_layers': 3, 'n_units': 16}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:38:17,242] Trial 8 finished with value: 0.8194303853846963 and parameters: {'learning_rate': 0.058325078344585606, 'batch_size': 32, 'epochs': 20, 'n_layers': 2, 'n_units': 64}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:38:34,562] Trial 9 finished with value: 0.7734761553847773 and parameters: {'learning_rate': 0.000818215342001073, 'batch_size': 16, 'epochs': 39, 'n_layers': 2, 'n_units': 96}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:38:39,616] Trial 10 finished with value: 0.8270728902637524 and parameters: {'learning_rate': 0.0002445067901620812, 'batch_size': 16, 'epochs': 11, 'n_layers': 1, 'n_units': 16}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:38:49,653] Trial 11 finished with value: 0.8325795427858802 and parameters: {'learning_rate': 0.00010677076235419241, 'batch_size': 32, 'epochs': 37, 'n_layers': 2, 'n_units': 32}. Best is trial 0 with value: 0.833672775271891.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:38:59,273] Trial 12 finished with value: 0.8343570578279494 and parameters: {'learning_rate': 0.00031702096211072583, 'batch_size': 32, 'epochs': 34, 'n_layers': 2, 'n_units': 32}. Best is trial 12 with value: 0.8343570578279494.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


[I 2025-05-22 13:39:14,731] Trial 13 finished with value: 0.8234976151335769 and parameters: {'learning_rate': 0.0004908936999362606, 'batch_size': 16, 'epochs': 35, 'n_layers': 2, 'n_units': 32}. Best is trial 12 with value: 0.8343570578279494.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:39:21,557] Trial 14 finished with value: 0.8305874302558164 and parameters: {'learning_rate': 0.0007502962711690895, 'batch_size': 64, 'epochs': 32, 'n_layers': 1, 'n_units': 32}. Best is trial 12 with value: 0.8343570578279494.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:39:34,492] Trial 15 finished with value: 0.8235462032440661 and parameters: {'learning_rate': 0.0003618043300987293, 'batch_size': 32, 'epochs': 50, 'n_layers': 2, 'n_units': 48}. Best is trial 12 with value: 0.8343570578279494.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:39:47,622] Trial 16 finished with value: 0.8241980937264651 and parameters: {'learning_rate': 0.001410335487400603, 'batch_size': 16, 'epochs': 31, 'n_layers': 2, 'n_units': 16}. Best is trial 12 with value: 0.8343570578279494.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:39:52,597] Trial 17 finished with value: 0.8343530088187421 and parameters: {'learning_rate': 0.0002391726419587422, 'batch_size': 32, 'epochs': 16, 'n_layers': 1, 'n_units': 48}. Best is trial 12 with value: 0.8343570578279494.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:40:00,789] Trial 18 finished with value: 0.8353166730101144 and parameters: {'learning_rate': 0.00016768921893995333, 'batch_size': 32, 'epochs': 27, 'n_layers': 1, 'n_units': 48}. Best is trial 18 with value: 0.8353166730101144.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)
  current = self.get_monitor_value(logs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


[I 2025-05-22 13:40:08,960] Trial 19 finished with value: 0.8334136386826143 and parameters: {'learning_rate': 0.00018431534119954884, 'batch_size': 32, 'epochs': 29, 'n_layers': 1, 'n_units': 48}. Best is trial 18 with value: 0.8353166730101144.
