In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams.update(plt.rcParamsDefault)
import os, sys

from scipy.stats import norm, skewnorm
from scipy.stats import gaussian_kde
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.utils import resample
from sklearn.metrics import accuracy_score, classification_report, make_scorer, log_loss, roc_auc_score, brier_score_loss
from sklearn.metrics import mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [2]:
proj_dir = 'C:/Users/ady05/Desktop/NU/DANA/NVQI/prob_learning_new/'
workspace = proj_dir + 'OtherModels/MC-mrs/'
util_dir = proj_dir + 'OtherModels/utils/'
data_dir = proj_dir + 'datasets/'
proc_dir = proj_dir + 'data processing/'

In [7]:
sys.path.insert(0, util_dir)
from data_proc import data_proc_mrs6
from plot_measures import (
    plot_confusion_matrix,
    plot_roc,
    plot_outcome_prob_relation,
    plot_feature_importance
)

# Data processing

In [4]:
df_comb = pd.read_excel(proc_dir + 'comb.xlsx')
df_num = pd.read_excel(data_dir + 'vargroups_numeric_new.xlsx')
df_cat = pd.read_excel(data_dir + 'vargroups_categorical_new.xlsx')

groupname = 'group preop'

In [10]:
X_data, y_data, num_names, cat_names = data_proc_mrs6(df_comb, df_num, df_cat, groupname)

(X_data.shape, y_data.shape)

((3588, 49), (3588,))

In [11]:
if hasattr(y_data, "toarray"):  # Check if y_data is a sparse matrix
    y_data = y_data.toarray().ravel() 
X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_data, stratify=y_data, test_size=0.2, random_state=1121218
)

(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

((2870, 49), (2870,), (718, 49), (718,))

# MC dropout model

In [16]:
tf.random.set_seed(42)

def build_mc_dropout_model(input_shape):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1) 
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae']) 
    return model

input_shape = X_train.shape[1]
mc_dropout_model = build_mc_dropout_model(input_shape)
mc_dropout_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

n_samples = 100 
y_preds_mc = []

for _ in range(n_samples):
    y_pred = mc_dropout_model(X_test, training=True)  
    y_preds_mc.append(y_pred)

y_preds_mc = np.array(y_preds_mc)
y_mean_mc = y_preds_mc.mean(axis=0).ravel()  
y_std_mc = y_preds_mc.std(axis=0).ravel() 

mse_mc = mean_squared_error(y_test, y_mean_mc)
mae_mc = mean_absolute_error(y_test, y_mean_mc)

print(f"Monte Carlo Dropout Mean Squared Error (MSE): {mse_mc:.4f}")
print(f"Monte Carlo Dropout Mean Absolute Error (MAE): {mae_mc:.4f}")

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 7.9332 - mae: 2.2526 - val_loss: 3.7886 - val_mae: 1.5911
Epoch 2/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4.6393 - mae: 1.7415 - val_loss: 3.4465 - val_mae: 1.5089
Epoch 3/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 4.2942 - mae: 1.6699 - val_loss: 3.5079 - val_mae: 1.5165
Epoch 4/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4.0052 - mae: 1.6272 - val_loss: 3.3747 - val_mae: 1.4849
Epoch 5/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.7005 - mae: 1.5586 - val_loss: 3.2235 - val_mae: 1.4562
Epoch 6/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3.8456 - mae: 1.6007 - val_loss: 3.3763 - val_mae: 1.4898
Epoch 7/20
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 3.5222 - mae: 1.517

# Measures

In [17]:
def root_mean_squared_error(y_pred, y_test):
    return np.sqrt(mean_squared_error(y_pred, y_test))
def normal_nll(loc, scale, y_test):
    return -norm.logpdf(y_test.flatten(), loc=loc, scale=scale).mean()