In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, accuracy_score, auc, average_precision_score, precision_recall_curve
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import recall_score
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from math import sqrt

In [2]:

pd.options.display.max_rows = 20
pd.options.display.max_columns = 50

In [3]:
data = pd.read_csv(r'procol_train.csv', index_col='CASEID')

In [4]:
y = data['URETER']
X = data.drop(['URETER'], axis=1)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=2)

test = pd.read_csv(r'/home/kchen/Documents/ureterinjury/procol_test.csv', index_col='CASEID')
y_test = test['URETER']
X_test = test.drop(['URETER','OPTIME'], axis=1)

In [5]:
input_shape = [X_train.shape[1]]

In [6]:
model4 = keras.models.Sequential()
model4.add(keras.layers.Flatten(input_shape=input_shape))
model4.add(keras.layers.BatchNormalization())
for _ in range(1):
    model4.add(keras.layers.Dense(200))
    model4.add(keras.layers.BatchNormalization())
    model4.add(keras.layers.Dropout(0.8))
    model4.add(keras.layers.Activation("relu"))
model4.add(keras.layers.Dense(1, activation="sigmoid"))

opt = keras.optimizers.Adam(learning_rate=3e-3)

metrics = [keras.metrics.Recall(name='Sensitivity'), keras.metrics.TrueNegatives(name='tn'), keras.metrics.AUC(name='auc'), keras.metrics.AUC(name='prc', curve='PR')]

model4.compile(
    optimizer=opt,
    loss=keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=metrics,)

early_stopping = keras.callbacks.EarlyStopping(
    patience=25,
    min_delta=1e-6,
    restore_best_weights=True,)

history = model4.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=512,
    epochs=500,
    callbacks=[early_stopping],)

2022-01-18 09:57:52.906715: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-18 09:57:52.912271: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-18 09:57:52.912705: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-18 09:57:52.913500: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500


In [7]:
ann_preds = model4.predict(X_valid)
ann_preds = np.squeeze(ann_preds)

In [8]:
%store ann_preds

Stored 'ann_preds' (ndarray)


In [9]:
test = pd.read_csv(r'procol_test.csv', index_col='CASEID')
y_test = test['URETER']
X_test = test.drop(['URETER'], axis=1)
model4.evaluate(X_test, y_test, workers=-1)



[0.034024279564619064, 0.0, 50949.0, 0.7039923071861267, 0.017678499221801758]

In [10]:
ann_fpr, ann_tpr, _ = roc_curve(y_test, ann_preds)
%store ann_fpr
%store ann_tpr


ValueError: Found input variables with inconsistent numbers of samples: [51250, 44781]

In [None]:
ann_prec, ann_rec, _ = precision_recall_curve(y_test, ann_preds)
%store ann_prec
%store ann_rec

Stored 'ann_prec' (ndarray)
Stored 'ann_rec' (ndarray)


In [None]:
roc_auc_score(y_test, ann_preds)

0.7434453214206977

In [None]:
def roc_auc_ci(y_true, y_score, positive=1):
    AUC = roc_auc_score(y_true, y_score)
    N1 = sum(y_true == positive)
    N2 = sum(y_true != positive)
    Q1 = AUC / (2 - AUC)
    Q2 = 2*AUC**2 / (1 + AUC)
    SE_AUC = sqrt((AUC*(1 - AUC) + (N1 - 1)*(Q1 - AUC**2) + (N2 - 1)*(Q2 - AUC**2)) / (N1*N2))
    lower = AUC - 1.96*SE_AUC
    upper = AUC + 1.96*SE_AUC
    if lower < 0:
        lower = 0
    if upper > 1:
        upper = 1
    return (lower, AUC, upper)
roc_auc_ci(y_test, ann_preds)

(0.7111808375949549, 0.7434453214206977, 0.7757098052464406)

In [None]:
def roc_prc_ci(y_true, y_score, positive=1):
    AUC = average_precision_score(y_true, y_score)
    N1 = sum(y_true == positive)
    N2 = sum(y_true != positive)
    Q1 = AUC / (2 - AUC)
    Q2 = 2*AUC**2 / (1 + AUC)
    SE_AUC = sqrt((AUC*(1 - AUC) + (N1 - 1)*(Q1 - AUC**2) + (N2 - 1)*(Q2 - AUC**2)) / (N1*N2))
    lower = AUC - 1.96*SE_AUC
    upper = AUC + 1.96*SE_AUC
    if lower < 0:
        lower = 0
    if upper > 1:
        upper = 1
    return (lower, AUC, upper)


In [None]:
roc_prc_ci(y_test, ann_preds)

(0.022501026957016557, 0.025466243922041464, 0.02843146088706637)