In [1]:
import pandas as pd
import seaborn as sns
import sklearn
from tensorflow import keras

In [2]:
%pylab inline 
import seaborn as sns
sns.set()
from IPython.display import SVG

Populating the interactive namespace from numpy and matplotlib


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, GlobalAveragePooling2D, InputLayer, Input, Dropout, BatchNormalization

In [6]:
from tensorflow.keras.utils import to_categorical

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [8]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LambdaCallback, LearningRateScheduler

In [9]:
from tensorflow.keras.models import load_model, Model

In [10]:
from tensorflow.keras import regularizers

In [11]:
import numpy as np
np.set_printoptions(suppress=True) # Suppress scientific notation where possible

In [12]:
# Make some room to see stuff (i.e. drop display limits on Pandas rows & cols - be careful w/ big df's!)

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)

(Class below comes from: https://stackoverflow.com/questions/67271590/issue-with-custom-metric-auc-callback-for-keras)

In [13]:
# Custom skelearn-based AUROC calculation class for use with the FULL validation 
#   set (not just mini-batches) at the end of each training epoch

class ROAUCMetrics(keras.callbacks.Callback):
    def __init__(self, val_data):
        super().__init__()
        self.valid_x = val_data[0]
        self.valid_y = val_data[1]

    def on_train_begin(self, logs={}):
        self.val_aucs = []

    def on_epoch_end(self, epoch, logs={}):
        pred = self.model.predict(self.valid_x)
        val_auc = roc_auc_score(self.valid_y, pred,  average='micro')
        print('\nval-roc-auc: %s' % (str(round(val_auc,4))),end=100*' '+'\n\n')
        self.val_aucs.append(val_auc)
        return

# 1) Import the full validation dataset

In [15]:
X_val_full = np.load('data2/X_val.dat')
y_val_full = np.load('data2/y_val.dat')

print(X_val_full.shape)
print(y_val_full.shape)

(8029, 224, 273, 1)
(8029, 4)


In [16]:
y_val_full = y_val_full.astype(float) 

# 2) Try the full dataset on the baby models

## 2a) VGG16 + 3 Dense Layers (128, 64, 1, dropout) w/ trainable layer_5 (Target 0: 'Atelectasis')
#### Dropout = 0.30, l1_reg = 0.00001, l2_reg = 0.0001

In [17]:
model_0 = load_model('models_baby2/model_10.13-0.6911.hdf5')

y_pred_val_full = model_0.predict(X_val_full)

2022-01-21 09:48:01.153652: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-21 09:48:02.404639: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


In [33]:
model_0.optimizer.get_config()

{'name': 'Adam',
 'learning_rate': 0.000125,
 'decay': 0.0,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'epsilon': 1e-07,
 'amsgrad': False}

### Baby Dataset Results:

#### PRIMARY TARGET (0):

#### Validation AUC: 0.6911

#### Training AUC: 0.7066

### Full Validation Set Results (on Baby-Trained Model):

In [18]:
print('PRIMARY TARGET ({}):'.format(0))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,0], y_pred_val_full), 4)))

PRIMARY TARGET (0):
Full Validation Set AUC: 0.641


In [19]:
print('SECONDARY TARGET ({}):'.format(1))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,1], y_pred_val_full), 4)))

SECONDARY TARGET (1):
Full Validation Set AUC: 0.547


In [20]:
print('SECONDARY TARGET ({}):'.format(2))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,2], y_pred_val_full), 4)))

SECONDARY TARGET (2):
Full Validation Set AUC: 0.63


In [21]:
print('SECONDARY TARGET ({}):'.format(3))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,3], y_pred_val_full), 4)))

SECONDARY TARGET (3):
Full Validation Set AUC: 0.6959


## 2b) VGG16 + 3 Dense Layers (128, 64, 1, dropout) w/ trainable layer_5 (Target 1: 'Cardiomegaly')
#### Dropout = 0.35, l1_reg = 0.00001, l2_reg = 0.0001

In [40]:
model_1 = load_model('models_baby/model_10.07-0.6224.hdf5')

y_pred_val_full = model_1.predict(X_val_full)

In [41]:
model_1.optimizer.get_config()

{'name': 'Adam',
 'learning_rate': 0.00025,
 'decay': 0.0,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'epsilon': 1e-07,
 'amsgrad': False}

### Baby Dataset Results:

#### PRIMARY TARGET (1):
#### Validation AUC: 0.6224
#### Training AUC: 0.7687

### Full Validation Set Results (on Baby-Trained Model):

In [42]:
print('PRIMARY TARGET ({}):'.format(1))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,1], y_pred_val_full), 4)))

PRIMARY TARGET (1):
Full Validation Set AUC: 0.6702


In [43]:
print('SECONDARY TARGET ({}):'.format(0))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,0], y_pred_val_full), 4)))

SECONDARY TARGET (0):
Full Validation Set AUC: 0.581


In [44]:
print('SECONDARY TARGET ({}):'.format(2))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,2], y_pred_val_full), 4)))

SECONDARY TARGET (2):
Full Validation Set AUC: 0.6266


In [45]:
print('SECONDARY TARGET ({}):'.format(3))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,3], y_pred_val_full), 4)))

SECONDARY TARGET (3):
Full Validation Set AUC: 0.6115


## 2c) VGG16 + 3 Dense Layers (128, 64, 1, dropout) w/ trainable layer_5 (Target 2: 'Edema')
#### Dropout = 0.35, l1_reg = 0.00025, l2_reg = 0.0025

In [26]:
model_2 = load_model('models_baby2/model_11.04-0.7043.hdf5')

y_pred_val_full = model_2.predict(X_val_full)

In [31]:
model_2.optimizer.get_config()

{'name': 'Adam',
 'learning_rate': 0.00025,
 'decay': 0.0,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'epsilon': 1e-07,
 'amsgrad': False}

### Baby Dataset Results:
#### PRIMARY TARGET (2):
#### Validation AUC: 0.7043
#### Training AUC: 0.7376

### Full Validation Set Results (on Baby-Trained Model):

In [27]:
print('PRIMARY TARGET ({}):'.format(2))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,2], y_pred_val_full), 4)))

PRIMARY TARGET (2):
Full Validation Set AUC: 0.6749


In [28]:
print('SECONDARY TARGET ({}):'.format(0))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,0], y_pred_val_full), 4)))

SECONDARY TARGET (0):
Full Validation Set AUC: 0.6022


In [29]:
print('SECONDARY TARGET ({}):'.format(1))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,1], y_pred_val_full), 4)))

SECONDARY TARGET (1):
Full Validation Set AUC: 0.5761


In [30]:
print('SECONDARY TARGET ({}):'.format(3))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,3], y_pred_val_full), 4)))

SECONDARY TARGET (3):
Full Validation Set AUC: 0.6554


## 2d) VGG16 + 3 Dense Layers (128, 64, 1, dropout) w/ trainable layer_5 (Target 3: 'Pleural Effusion')
#### Dropout = 0.40, l1_reg = 0.00001, l2_reg = 0.0001

In [34]:
model_3 = load_model('models_baby3/model_6.08-0.7715.hdf5')

y_pred_val_full = model_3.predict(X_val_full)

In [35]:
model_3.optimizer.get_config()

{'name': 'Adam',
 'learning_rate': 0.00025,
 'decay': 0.0,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'epsilon': 1e-07,
 'amsgrad': False}

### Baby Dataset Results:
#### PRIMARY TARGET (3):
#### Validation AUC: 0.7715
#### Training AUC: 0.8066

### Full Validation Set Results (on Baby-Trained Model):

In [36]:
print('PRIMARY TARGET ({}):'.format(3))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,3], y_pred_val_full), 4)))

PRIMARY TARGET (3):
Full Validation Set AUC: 0.7611


In [37]:
print('SECONDARY TARGET ({}):'.format(0))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,0], y_pred_val_full), 4)))

SECONDARY TARGET (0):
Full Validation Set AUC: 0.6192


In [38]:
print('SECONDARY TARGET ({}):'.format(1))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,1], y_pred_val_full), 4)))

SECONDARY TARGET (1):
Full Validation Set AUC: 0.5661


In [39]:
print('SECONDARY TARGET ({}):'.format(2))
print('Full Validation Set AUC: {}'.format(round(roc_auc_score(y_val_full[:,2], y_pred_val_full), 4)))

SECONDARY TARGET (2):
Full Validation Set AUC: 0.6409
