In [None]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split 

import tensorflow as tf 

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score, precision_score, roc_auc_score


In [None]:
data=pd.read_csv('Database_propre.csv')


prediction=data['prediction']
data= data.drop('prediction', axis=1)

x_train,x_test,y_train,y_test=train_test_split(data,prediction,test_size=0.3,random_state=42)


#Standardisation des données

scaler=StandardScaler()

x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)






### SMOTE 

smote = SMOTE(sampling_strategy=0.5, random_state=42)
X_resampled, y_resampled = smote.fit_resample(x_train, y_train)





model=Sequential()

model.add(Input(shape=(27,)))
model.add(Dense(26, activation='relu'))     #first hidden layer
model.add(Dense(13, activation='relu'))                        # second hidden layer

model.add(Dense( 6, activation='relu'))                  #third hidden layer 

model.add(Dense(1, activation="sigmoid"))                     #output layer 

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#Train the model 

history= model.fit(X_resampled, y_resampled, epochs=10, validation_split=0.2, verbose=1)




plt.figure(figsize=(12, 5))

# regardons  les courbes du training et la validation loss (perte d'entrainement ou de validation) en fonction du nombre d'epochs 


plt.subplot(1,2,1)
plt.plot(history.history['loss'],label='Train loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

# Plot training & validation accuracy values
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.show()


##Metric visualisation

y_pred=model.predict(x_test)
# Convert probabilities to binary predictions
y_pred = (y_pred > 0.5).astype(int)

from sklearn.metrics import recall_score, precision_score, roc_auc_score , f1_score

recall=recall_score(y_test,y_pred)
precision=precision_score(y_test,y_pred)
roc_auc=roc_auc_score(y_test,y_pred)
print('f1_score=',f1_score(y_test,y_pred))

print('recall=',recall)
print('precision=',precision)
print('roc_auc=',roc_auc)




### Undersampling 

# Définir l'undersampling pour équilibrer les classes
undersampler = RandomUnderSampler(sampling_strategy=0.5, random_state=42)  # 50% de la classe majoritaire
X_resampled, y_resampled = undersampler.fit_resample(x_train, y_train)

model=Sequential()

model.add(Input(shape=(27,)))
model.add(Dense(26, activation='relu'))       #first hidden layer
model.add(Dense(13, activation='relu'))                        # second hidden layer

model.add(Dense(6, activation='relu'))                      #third hidden layer 

model.add(Dense(1, activation="sigmoid"))                     #output layer 

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#Train the model 

history= model.fit(X_resampled, y_resampled, epochs=10, validation_split=0.2, verbose=1)


#Metric 


y_pred=model.predict(x_test)
# Convert probabilities to binary predictions
y_pred = (y_pred > 0.5).astype(int)

from sklearn.metrics import recall_score, precision_score, roc_auc_score , f1_score

recall=recall_score(y_test,y_pred)
precision=precision_score(y_test,y_pred)
roc_auc=roc_auc_score(y_test,y_pred)
print('f1_score=',f1_score(y_test,y_pred))

print('recall=',recall)
print('precision=',precision)
print('roc_auc=',roc_auc)
