In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout
from keras.optimizers import SGD, Adam, RMSprop
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import make_scorer
import tensorflow as tf

from imblearn.over_sampling import RandomOverSampler

import warnings
warnings.filterwarnings("ignore")




## OPTIMIZATION OF THE NEURAL NETWORK MODEL

After the first evaluation on In_hospital_death the m1_df with RandomOverSampler was he one with less deviation and slightly better recall. Thus the optimization will be done using m1_df and the same balancing technique. 


### 1-Load data

In [22]:
m1_df = pd.read_csv('clean_data/withsetc/minus1_imputed_df.csv')

### 2- Optimization process

**2.1 -Create grids to evaluate different parameters**

in order to increase speed in a firs step we will evaluate:
 -  'optimizer': ['adam', 'rmsprop', tf.keras.optimizers.legacy.SGD]
 -  'hidden_layers': [1, 2, 3, 5]
 -  'units': [32, 64, 128]
 -  'batch_size': [16, 32, 64]

In [23]:
# Define the function to create the model
def create_model(optimizer='sgd', hidden_layers=1, units=64, batch_size=32):
    model = Sequential()
    model.add(Dense(units, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(BatchNormalization())
    
    for _ in range(hidden_layers):
        model.add(Dense(units, activation='relu'))
        model.add(BatchNormalization())
    
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    return model


In [24]:
# Extract features and target variable
X = m1_df.drop(columns=['In-hospital_death', 'Length_of_stay', 'RecordID'])
y = m1_df['In-hospital_death']

# Data splitting
test_size = 0.2
random_state = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

# Data balancing
ros = RandomOverSampler(random_state=42)
X_train, y_train = ros.fit_resample(X_train, y_train)

# Data normalization
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Define your parameter grid
param_grid = {
    'optimizer': ['adam', 'rmsprop', tf.keras.optimizers.legacy.SGD],
    'hidden_layers': [1, 2, 3, 5],
    'units': [32, 64, 128],
    'batch_size': [16, 32, 64]
}

# Wrap the Keras model using KerasClassifier
model = KerasClassifier(build_fn=create_model, hidden_layers=1,units=32,verbose=0)

# Define the F1 scorer
f1_scorer = make_scorer(f1_score)

# Perform grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring=f1_scorer, verbose=2)
grid_result = grid.fit(X_train, y_train)

# Extract results and convert to DataFrame
results_df = pd.DataFrame(grid_result.cv_results_)

# Select relevant columns from the results DataFrame
relevant_columns = ['param_optimizer', 'param_hidden_layers', 'param_units', 'param_batch_size', 'mean_test_score']
results_df = results_df[relevant_columns]

# Replace tf.keras.optimizers.legacy.SGD with 'SGD'
results_df['param_optimizer'] = results_df['param_optimizer'].replace({tf.keras.optimizers.legacy.SGD: 'SGD'})

# Sort the DataFrame by mean_test_score in descending order
results_df.sort_values(by='mean_test_score', ascending=False, inplace=True)

# Reset index
results_df.reset_index(drop=True, inplace=True)

# Print the DataFrame
print(results_df)

# Get the best parameters
best_params = grid_result.best_params_
print("Best Parameters:", best_params)

# Build the optimizer
if best_params['optimizer'] == tf.keras.optimizers.legacy.SGD:
    optimizer = tf.keras.optimizers.legacy.SGD()
else:
    optimizer = best_params['optimizer']

# Train the model with the best parameters
best_model = create_model(optimizer=optimizer, hidden_layers=best_params['hidden_layers'], 
                          units=best_params['units'], batch_size=best_params['batch_size'])
best_model.fit(X_train, y_train, epochs=20, batch_size= best_params['batch_size'], verbose=1)

# Getting predictions
y_pred = best_model.predict(X_test)
y_pred_binary = np.where(y_pred > 0.5, 1, 0)

# Calculate precision and recall
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)

print(f"Precision on test set: {precision:.2f}")
print(f"Recall on test set: {recall:.2f}")


Fitting 3 folds for each of 108 candidates, totalling 324 fits




[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=32; total time=   1.9s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=32; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=32; total time=   1.4s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=64; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=64; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=64; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=128; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=128; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=adam, units=128; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=rmsprop, units=32; total time=   1.3s
[CV] END batch_size=16, hidden_layers=1, optimizer=rmsprop, units=3

In [17]:
grid_summary_1 = results_df
grid_summary_1

Unnamed: 0,param_optimizer,param_hidden_layers,param_units,param_batch_size,mean_test_score
0,SGD,2,128,16,0.785045
1,rmsprop,3,128,16,0.783582
2,SGD,5,128,32,0.780723
3,adam,3,128,32,0.779829
4,rmsprop,5,128,32,0.779598
...,...,...,...,...,...
103,rmsprop,3,64,64,0.637896
104,adam,3,32,64,0.612140
105,adam,5,64,64,0.588239
106,SGD,5,32,64,0.581989


In [15]:
best_params_1= best_params
best_params_1

{'batch_size': 16,
 'hidden_layers': 2,
 'optimizer': keras.src.optimizers.legacy.gradient_descent.SGD,
 'units': 128}

as it seems that both units and batch size the higher the better we will fix the optimizer = SGD and hidden layers = 2 and will test again higher units and smaller batch sizes

In [20]:
# Define your new parameter grid
new_param_grid = {
    'units': [128, 180, 220, 250],
    'batch_size': [4,8,16]
}

# Wrap the Keras model using KerasClassifier
new_model = KerasClassifier(build_fn=create_model, optimizer='SGD',units=128, hidden_layers=2, verbose=0)

# Perform grid search
new_grid = GridSearchCV(estimator=new_model, param_grid=new_param_grid, cv=3, scoring=f1_scorer, verbose=2)
new_grid_result = new_grid.fit(X_train, y_train)

# Extract results and convert to DataFrame
new_results_df = pd.DataFrame(new_grid_result.cv_results_)

# Select relevant columns from the results DataFrame
new_relevant_columns = ['param_units', 'param_batch_size', 'mean_test_score']
new_results_df = new_results_df[new_relevant_columns]

# Sort the DataFrame by mean_test_score in descending order
new_results_df.sort_values(by='mean_test_score', ascending=False, inplace=True)

# Reset index
new_results_df.reset_index(drop=True, inplace=True)

# Print the DataFrame
print(new_results_df)

# Get the best parameters
new_best_params = new_grid_result.best_params_
print("Best Parameters:", new_best_params)

# Train the model with the best parameters
new_best_model = create_model(optimizer='SGD', hidden_layers=2, units=new_best_params['units'], batch_size=new_best_params['batch_size'])
new_best_model.fit(X_train, y_train, epochs=20, batch_size= new_best_params['batch_size'], verbose=1)

# Getting predictions
new_y_pred = new_best_model.predict(X_test)
new_y_pred_binary = np.where(new_y_pred > 0.5, 1, 0)

# Calculate precision and recall
new_precision = precision_score(y_test, new_y_pred_binary)
new_recall = recall_score(y_test, new_y_pred_binary)

print(f"Precision on test set: {new_precision:.2f}")
print(f"Recall on test set: {new_recall:.2f}")


Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] END ............................batch_size=4, units=128; total time=   5.1s
[CV] END ............................batch_size=4, units=128; total time=   5.4s
[CV] END ............................batch_size=4, units=128; total time=   5.3s
[CV] END ............................batch_size=4, units=180; total time=   5.1s
[CV] END ............................batch_size=4, units=180; total time=   4.9s
[CV] END ............................batch_size=4, units=180; total time=   5.0s
[CV] END ............................batch_size=4, units=220; total time=   5.2s
[CV] END ............................batch_size=4, units=220; total time=   5.1s
[CV] END ............................batch_size=4, units=220; total time=   5.2s
[CV] END ............................batch_size=4, units=250; total time=   5.4s
[CV] END ............................batch_size=4, units=250; total time=   5.6s
[CV] END ............................batch_size=

In [21]:
best_params_2 = new_best_params
best_params_2

{'batch_size': 16, 'units': 250}

In [22]:
grid_summary_2= new_results_df
grid_summary_2

Unnamed: 0,param_units,param_batch_size,mean_test_score
0,250,16,0.820181
1,180,16,0.814218
2,220,16,0.8102
3,128,16,0.808238
4,220,8,0.784369
5,180,8,0.783319
6,128,8,0.777452
7,250,8,0.768291
8,128,4,0.745246
9,220,4,0.739598


we can already fix batch_size = 16 , but units has arrive to the maximum value so we will make a new grid with higher values

In [26]:
# Define your new parameter grid
new_param_grid = {
    'units': [250,280,310],
}

# Wrap the Keras model using KerasClassifier
new_model = KerasClassifier(build_fn=create_model, optimizer='SGD',units=128, hidden_layers=2, verbose=0)

# Perform grid search
new_grid = GridSearchCV(estimator=new_model, param_grid=new_param_grid, cv=3, scoring=f1_scorer, verbose=2)
new_grid_result = new_grid.fit(X_train, y_train)

# Extract results and convert to DataFrame
new_results_df = pd.DataFrame(new_grid_result.cv_results_)

# Select relevant columns from the results DataFrame
new_relevant_columns = [ 'param_units', 'mean_test_score']
new_results_df = new_results_df[new_relevant_columns]

# Sort the DataFrame by mean_test_score in descending order
new_results_df.sort_values(by='mean_test_score', ascending=False, inplace=True)

# Reset index
new_results_df.reset_index(drop=True, inplace=True)

# Print the DataFrame
print(new_results_df)

# Get the best parameters
new_best_params = new_grid_result.best_params_
print("Best Parameters:", new_best_params)

# Train the model with the best parameters
new_best_model = create_model(optimizer='SGD', hidden_layers=2, units=new_best_params['units'], batch_size= 16)
new_best_model.fit(X_train, y_train, epochs=20, batch_size= 16, verbose=1)

# Getting predictions
new_y_pred = new_best_model.predict(X_test)
new_y_pred_binary = np.where(new_y_pred > 0.5, 1, 0)

# Calculate precision and recall
new_precision = precision_score(y_test, new_y_pred_binary)
new_recall = recall_score(y_test, new_y_pred_binary)

print(f"Precision on test set: {new_precision:.2f}")
print(f"Recall on test set: {new_recall:.2f}")


Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV] END ..........................................units=250; total time=   1.8s
[CV] END ..........................................units=250; total time=   1.8s
[CV] END ..........................................units=250; total time=   1.8s
[CV] END ..........................................units=280; total time=   1.8s
[CV] END ..........................................units=280; total time=   2.0s
[CV] END ..........................................units=280; total time=   1.8s
[CV] END ..........................................units=310; total time=   2.3s
[CV] END ..........................................units=310; total time=   2.4s
[CV] END ..........................................units=310; total time=   2.3s
  param_units  mean_test_score
0         280         0.850210
1         310         0.846807
2         250         0.844207
Best Parameters: {'units': 280}
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
E

In [28]:
best_params_3 =new_best_params
best_params_3

{'units': 280}

In [29]:
grid_summary_3 = new_results_df
grid_summary_3

Unnamed: 0,param_units,mean_test_score
0,280,0.85021
1,310,0.846807
2,250,0.844207


Now we can fix the following parameters:
 - optimizer = SGD
 - hidden layers = 2
 - batch size = 16
 - units = 28
 

Nex grid for testing: 
 - learning_rate': [0.01, 0.1, 0.2],
- 'dropout_rate': [0.1, 0.2, 0.3],
 - 'epochs': [20, 30, 40]

we must create a new create_model function in order to set the learning rate

In [25]:
def create_model_lr(optimizer='SGD', learning_rate=0.1, hidden_layers=2, units=280, dropout_rate=None, epochs=20):
    model = Sequential()
    model.add(Dense(units, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(BatchNormalization())
    
    for _ in range(hidden_layers):
        model.add(Dense(units, activation='relu'))
        model.add(BatchNormalization())
        if dropout_rate is not None:
            model.add(Dropout(dropout_rate))  # Add dropout layer with specified rate
    
    model.add(Dense(1, activation='sigmoid'))

    if optimizer == 'SGD':
        opt = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer == 'Adam':
        opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'RMSprop':
        opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    else:
        raise ValueError("Unsupported optimizer")
    
    model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
    
    return model


In [26]:
# Define your new parameter grid
new_param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'dropout_rate': [0.1, 0.2, 0.3],
    'epochs': [20, 30, 40]
}
# Wrap the Keras model using KerasClassifier
new_model = KerasClassifier(build_fn=create_model_lr, optimizer='SGD', hidden_layers=2, units=280,dropout_rate=0.2, learning_rate=0.1, verbose=0)

# Define the F1 scorer
f1_scorer = make_scorer(f1_score)

# Perform grid search
new_grid = GridSearchCV(estimator=new_model, param_grid=new_param_grid, cv=3, scoring=f1_scorer, verbose=2)
new_grid_result = new_grid.fit(X_train, y_train)

# Extract results and convert to DataFrame
new_results_df = pd.DataFrame(new_grid_result.cv_results_)

# Select relevant columns from the results DataFrame
new_relevant_columns = ['param_learning_rate', 'param_dropout_rate', 'param_epochs', 'mean_test_score']
new_results_df = new_results_df[new_relevant_columns]

# Sort the DataFrame by mean_test_score in descending order
new_results_df.sort_values(by='mean_test_score', ascending=False, inplace=True)

# Reset index
new_results_df.reset_index(drop=True, inplace=True)

# Print the DataFrame
print(new_results_df)

# Get the best parameters
new_best_params = new_grid_result.best_params_
print("Best Parameters:", new_best_params)

# Train the model with the best parameters
new_best_model = create_model_lr(optimizer='SGD', hidden_layers=2, units=280, 
                               learning_rate=new_best_params['learning_rate'], 
                               dropout_rate=new_best_params['dropout_rate'],
                               epochs=new_best_params['epochs'])
new_best_model.fit(X_train, y_train, epochs=new_best_params['epochs'], batch_size=16, verbose=1)

# Getting predictions
new_y_pred = new_best_model.predict(X_test)
new_y_pred_binary = (new_y_pred > 0.5).astype(int)

# Calculate precision and recall
new_precision = precision_score(y_test, new_y_pred_binary)
new_recall = recall_score(y_test, new_y_pred_binary)

print(f"Precision on test set: {new_precision:.2f}")
print(f"Recall on test set: {new_recall:.2f}")


Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV] END ....dropout_rate=0.1, epochs=20, learning_rate=0.01; total time=  14.8s
[CV] END ....dropout_rate=0.1, epochs=20, learning_rate=0.01; total time=  18.3s
[CV] END ....dropout_rate=0.1, epochs=20, learning_rate=0.01; total time=  18.7s
[CV] END .....dropout_rate=0.1, epochs=20, learning_rate=0.1; total time=  18.6s
[CV] END .....dropout_rate=0.1, epochs=20, learning_rate=0.1; total time=  19.1s
[CV] END .....dropout_rate=0.1, epochs=20, learning_rate=0.1; total time=  19.1s
[CV] END .....dropout_rate=0.1, epochs=20, learning_rate=0.2; total time=  20.0s
[CV] END .....dropout_rate=0.1, epochs=20, learning_rate=0.2; total time=  19.8s
[CV] END .....dropout_rate=0.1, epochs=20, learning_rate=0.2; total time=  20.4s
[CV] END ....dropout_rate=0.1, epochs=30, learning_rate=0.01; total time=  29.7s
[CV] END ....dropout_rate=0.1, epochs=30, learning_rate=0.01; total time=  29.8s
[CV] END ....dropout_rate=0.1, epochs=30, learni

In [27]:
best_params_4 =new_best_params
best_params_4

{'dropout_rate': 0.3, 'epochs': 40, 'learning_rate': 0.1}

In [28]:
grid_summary_4 = new_results_df
grid_summary_4

Unnamed: 0,param_learning_rate,param_dropout_rate,param_epochs,mean_test_score
0,0.1,0.3,40,0.947323
1,0.1,0.1,40,0.945208
2,0.2,0.1,40,0.944951
3,0.01,0.1,40,0.944536
4,0.2,0.2,40,0.944026
5,0.1,0.1,30,0.943448
6,0.2,0.1,30,0.943306
7,0.1,0.1,20,0.942507
8,0.2,0.1,20,0.941493
9,0.1,0.2,40,0.940585


we have the learning rate of 0.1 as the best value. But we must retest epochs to see higher values and drop_out rate to test lower values

In [29]:
# Define your new parameter grid
new_param_grid = {
    'dropout_rate': [0.3, 0.4, 0.5],
    'epochs': [40, 50, 60]
}

# Wrap the Keras model using KerasClassifier
new_model = KerasClassifier(build_fn=create_model_lr, optimizer='SGD', hidden_layers=2, units=280, dropout_rate=0.2, learning_rate=0.1, verbose=0)

# Define the F1 scorer
f1_scorer = make_scorer(f1_score)

# Perform grid search
new_grid = GridSearchCV(estimator=new_model, param_grid=new_param_grid, cv=3, scoring=f1_scorer, verbose=2)
new_grid_result = new_grid.fit(X_train, y_train)

# Extract results and convert to DataFrame
new_results_df = pd.DataFrame(new_grid_result.cv_results_)

# Select relevant columns from the results DataFrame
new_relevant_columns = ['param_dropout_rate', 'param_epochs', 'mean_test_score']
new_results_df = new_results_df[new_relevant_columns]

# Sort the DataFrame by mean_test_score in descending order
new_results_df.sort_values(by='mean_test_score', ascending=False, inplace=True)

# Reset index
new_results_df.reset_index(drop=True, inplace=True)

# Print the DataFrame
print(new_results_df)

# Get the best parameters
new_best_params = new_grid_result.best_params_
print("Best Parameters:", new_best_params)

# Train the model with the best parameters
new_best_model = create_model_lr(optimizer='SGD', hidden_layers=2, units=280, 
                               learning_rate=0.1, 
                               dropout_rate=new_best_params['dropout_rate'],
                               epochs=new_best_params['epochs'])
new_best_model.fit(X_train, y_train, epochs=new_best_params['epochs'], batch_size=16, verbose=1)

# Getting predictions
new_y_pred = new_best_model.predict(X_test)
new_y_pred_binary = (new_y_pred > 0.5).astype(int)

# Calculate precision and recall
new_precision = precision_score(y_test, new_y_pred_binary)
new_recall = recall_score(y_test, new_y_pred_binary)

print(f"Precision on test set: {new_precision:.2f}")
print(f"Recall on test set: {new_recall:.2f}")

Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] END ........................dropout_rate=0.3, epochs=40; total time=  32.5s
[CV] END ........................dropout_rate=0.3, epochs=40; total time=  31.3s
[CV] END ........................dropout_rate=0.3, epochs=40; total time=  30.6s
[CV] END ........................dropout_rate=0.3, epochs=50; total time=  40.2s
[CV] END ........................dropout_rate=0.3, epochs=50; total time=  39.4s
[CV] END ........................dropout_rate=0.3, epochs=50; total time=  41.5s
[CV] END ........................dropout_rate=0.3, epochs=60; total time=  45.5s
[CV] END ........................dropout_rate=0.3, epochs=60; total time=  47.0s
[CV] END ........................dropout_rate=0.3, epochs=60; total time=  47.5s
[CV] END ........................dropout_rate=0.4, epochs=40; total time=  32.5s
[CV] END ........................dropout_rate=0.4, epochs=40; total time=  33.1s
[CV] END ........................dropout_rate=0.4

In [30]:
best_params_5 =new_best_params
best_params_5

{'dropout_rate': 0.3, 'epochs': 60}

In [31]:
grid_summary_5 = new_results_df
grid_summary_5

Unnamed: 0,param_dropout_rate,param_epochs,mean_test_score
0,0.3,60,0.948891
1,0.3,40,0.943602
2,0.5,60,0.941037
3,0.4,40,0.940062
4,0.3,50,0.939821
5,0.4,60,0.938729
6,0.4,50,0.938447
7,0.5,50,0.932058
8,0.5,40,0.928764


although epochs the higher the better it is not worth it in terms of computing time. If we look at the precision and recall in our testing, training data we have not objectively improved thus, we keep as best parameters combination:
- dropout_rate =0.3
- epochs = 40
- learning_rate = 0.1

### <U>FINAL PARAMETERS:
- dropout_rate =0.3
- epochs = 40
- learning_rate = 0.1
- optimizer = SGD
- hidden layers = 2
- batch size = 16
- units = 28