In [8]:
import os
import joblib
import warnings
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, recall_score, precision_score, f1_score
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings("ignore")

# Define function to compute the performance metrics for neural network
def compute_nn_metrics(X, y, smote=False, model_name='model'):
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler=MinMaxScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)

    if smote:
        sm = SMOTE(random_state=27)
        X_train, y_train = sm.fit_resample(X_train, y_train)

    # Train neural network model
    model = MLPClassifier(activation='tanh', solver='sgd',hidden_layer_sizes=(1000, 10), max_iter=10000, random_state=42)
    model.fit(X_train, y_train)
    
    # Save model with best performance
    joblib.dump(model, f'{model_name}.pkl')

    # Predict on test set
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    # Compute metrics
    auc = roc_auc_score(y_test, y_prob)
    recall = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    return auc, recall, precision, f1

def load_data(file_name):
    curr_file = os.path.join(os.getcwd().replace('models', 'data'), file_name)
    return pd.read_csv(curr_file, delimiter=",")

# Load data
data = load_data('data.csv')

# Extract features and labels for each dataset
X_data, y_data = data.drop('Outcome', axis=1), data['Outcome']

# Compute metrics for each dataset with SVM
metrics_data_nn = compute_nn_metrics(X_data, y_data, model_name='neural_network')
metrics_data_smote_nn = compute_nn_metrics(X_data, y_data, smote=True, model_name='neural_network_smote')

# Combine metrics into a single table for neural network
metrics_table_nn = pd.DataFrame({
    'Dataset': ['data', 'data_smote'],
    'AUC': [metrics_data_nn[0], metrics_data_smote_nn[0]],
    'Recall': [metrics_data_nn[1], metrics_data_smote_nn[1]],
    'Precision': [metrics_data_nn[2], metrics_data_smote_nn[2]],
    'F1': [metrics_data_nn[3], metrics_data_smote_nn[3]]
})


In [9]:
# Vizuallize metrics table
display(metrics_table_nn)

Unnamed: 0,Dataset,AUC,Recall,Precision,F1
0,data,0.8508,0.581395,0.78125,0.666667
1,data_smote,0.848384,0.837209,0.666667,0.742268
