# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import datetime
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array, load_img
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D, Dropout, Flatten, Dense
from tensorflow.keras.layers import BatchNormalization
from sklearn.metrics import confusion_matrix, roc_curve
from tensorflow.keras.metrics import AUC
from sklearn.metrics import auc
from sklearn.metrics import precision_recall_curve
%load_ext tensorboard
import keras_tuner as kt
from keras_tuner import HyperParameters, Tuner
from keras_tuner import Objective
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import precision_recall_curve, average_precision_score


Using TensorFlow backend


In [None]:
# !rm -rf ./logs/

## Mirrored Strategy

In [2]:
# Define the MirroredStrategy
mirrored_strategy = tf.distribute.MirroredStrategy()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


# Loading Dataset

In [3]:
source_path = '.'
train_directory = os.path.join(source_path, 'CheXpert-v1.0/train')
validation_directory = os.path.join(source_path, 'CheXpert-v1.0/valid')
test_directory = os.path.join(source_path, 'CheXpert-v1.0/test')

print(f"There are {len(os.listdir(train_directory))}")
print(f"There are {len(os.listdir(validation_directory))}")
print(f"There are {len(os.listdir(test_directory))}")

There are 64540
There are 200
There are 500


In [4]:
# Load train and valid labels
train_df = pd.read_csv(os.path.join(source_path, 'CheXpert-v1.0/train.csv'))
valid_df = pd.read_csv(os.path.join(source_path, 'CheXpert-v1.0/valid.csv'))
test_df = pd.read_csv(os.path.join(source_path, 'CheXpert-v1.0/test.csv'))

# Load training and validation image paths
train_image_paths = [source_path + '/' + path for path in train_df['Path']]
valid_image_paths = [source_path + '/' + path for path in valid_df['Path']]
test_image_paths = [source_path + '/' + path for path in test_df['Path']]

# Create TensorFlow tensors from image paths
train_image_paths = tf.constant(train_image_paths)
valid_image_paths = tf.constant(valid_image_paths)
test_image_paths = tf.constant(test_image_paths)

# Creating DataFrames

In [5]:
train_df = train_df[['Atelectasis','Cardiomegaly','Consolidation','Edema','Pleural Effusion',
                     'No Finding','Enlarged Cardiomediastinum', 'Lung Opacity','Lung Lesion','Pneumonia',
                     'Pneumothorax', 'Pleural Other', 'Fracture','Support Devices']]
# print(train_df.head())     # printing first five rows of the file
# print(train_df.columns)

In [6]:
valid_df = valid_df[['Atelectasis','Cardiomegaly','Consolidation','Edema','Pleural Effusion',
                     'No Finding','Enlarged Cardiomediastinum', 'Lung Opacity','Lung Lesion','Pneumonia',
                     'Pneumothorax', 'Pleural Other', 'Fracture','Support Devices']]
# print(valid_df.head())     # printing first five rows of the file
# print(valid_df.columns)

In [7]:
test_df = test_df[['Atelectasis','Cardiomegaly','Consolidation','Edema','Pleural Effusion',
                     'No Finding','Enlarged Cardiomediastinum', 'Lung Opacity','Lung Lesion','Pneumonia',
                     'Pneumothorax', 'Pleural Other', 'Fracture','Support Devices']]

# Train and Valid Labels

In [8]:
train_df_UOnes = train_df.replace(-1,1).fillna(0)
print(len(train_df_UOnes))
train_labels = np.array(train_df_UOnes)

223414


In [9]:
valid_df_UOnes = valid_df.fillna(0)
print(len(valid_df_UOnes))
valid_labels = np.array(valid_df_UOnes)

234


In [10]:
# %%
test_df_UOnes = test_df.fillna(0)
print(len(test_df_UOnes))
test_labels = np.array(test_df_UOnes)

668


# Image Preprocessing

In [11]:
from tqdm import tqdm
from keras.preprocessing import image

#training images preprocessing
SIZE = 320

# Define a custom preprocessing function
def preprocess_image(image_path, label):
    # Read the image file
    image = tf.io.read_file(image_path)
    # Decode the image from bytes to a tensor
    image = tf.image.decode_jpeg(image, channels=3)
#     # Resize the image to a fixed size
#     image = tf.image.resize(image, [SIZE, SIZE])
    # Normalize pixel values to be in the range [0, 1]
    image = tf.cast(image, tf.float32) / 255.0
    return image, label


# Prepare the data pipeline by setting batch size & buffer size using tf.data 

In [12]:
# Create TensorFlow datasets
train_ds = tf.data.Dataset.from_tensor_slices((train_image_paths, train_labels))
valid_ds = tf.data.Dataset.from_tensor_slices((valid_image_paths, valid_labels))
test_ds = tf.data.Dataset.from_tensor_slices((test_image_paths, test_labels))

In [13]:
batch_size = 16
AUTOTUNE = tf.data.AUTOTUNE

# Apply preprocessing function to the datasets
train_ds = train_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
valid_ds = valid_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)

# Visualize Sample Image

In [14]:
# # Plot a sample of 10 original images
# fig, axes = plt.subplots(1, 10, figsize=(16, 15))  # Adjust the figsize as needed
# axes = axes.flatten()

# for i, (image, label) in enumerate(train_ds.take(10)):
#     ax = axes[i]
#     ax.imshow(image.numpy())  # Select the first image from the batch
#     ax.set_axis_off()

# plt.tight_layout()
# plt.show()

# Augementation

In [15]:
import tensorflow_datasets as tfds
from tensorflow.keras import layers

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(
        height_factor=(-0.05, -0.15),
        width_factor=(-0.05, -0.15)),
    layers.RandomTranslation(height_factor=0.2, width_factor=0.2),
])

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
def prepare(ds, shuffle=False, augment=False):
    if shuffle:
        ds = ds.shuffle(5000)

    ds = ds.batch(batch_size)
    ds = ds.cache()

    if augment:
        ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y),
                    num_parallel_calls=AUTOTUNE)

    return ds.prefetch(buffer_size=AUTOTUNE)

In [17]:
train_ds = prepare(train_ds, shuffle=True, augment=True)
valid_ds = prepare(valid_ds)
test_ds = prepare(test_ds)

# Visualize Augmented Images

In [18]:
# import matplotlib.pyplot as plt

# # Define a function to plot sample images
# def plot_sample_images(dataset, num_samples=10):
#     # Create an iterator for the dataset
#     iterator = iter(dataset)

#     # Get the next batch of images and labels
#     sample_images, sample_labels = next(iterator)

#     # Plot the sample images
#     fig, axes = plt.subplots(1, num_samples, figsize=(16, 15))
#     axes = axes.flatten()

#     for i in range(num_samples):
#         img = sample_images[i]
#         ax = axes[i]
#         ax.imshow(img.numpy())  # Convert TensorFlow tensor to NumPy array for plotting
#         ax.set_axis_off()

#     plt.tight_layout()
#     plt.show()

# # Visualize sample images from the training dataset
# plot_sample_images(train_ds, num_samples=10)


# Hyperparameter Tuning

In [19]:
def create_model(hp):
    with mirrored_strategy.scope():
        model = tf.keras.models.Sequential()
        pre_trained_model = tf.keras.applications.densenet.DenseNet121(
            include_top=False,
            weights='imagenet',
            input_shape=(320, 320, 3)
        )

        model.add(pre_trained_model)
        model.add(GlobalAveragePooling2D(input_shape=(1024, 1, 1)))

        # Use hyperparameters to define the units for the first Dense layer
        dense1_units = hp.Int('dense_1_units', min_value=512, max_value=3072, step=512)
        model.add(Dense(dense1_units, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1)))

        # Use hyperparameters to define the units for the second Dense layer
        dense2_units = hp.Int('dense_2_units', min_value=256, max_value=1536, step=256)
        model.add(Dense(dense2_units, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(rate=hp.Float('dropout_2', min_value=0.0, max_value=0.5, step=0.1)))

        # The third Dense layer is the output layer with fixed units
        model.add(tf.keras.layers.Dense(units=14, activation='sigmoid'))

        # Define hyperparameters for the learning rate
        learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-3, sampling='log')
        
        model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss='binary_crossentropy',
              metrics=['binary_accuracy', tf.keras.metrics.AUC(multi_label=True, num_labels=14)])
        
        return model


In [20]:
tuner = kt.Hyperband(
    create_model,
    objective=Objective('val_auc', direction ="max"),
    max_epochs=4,
    factor=5,
    hyperband_iterations=1,  # number of times to iterate over the hyperband algorithm
    directory='kt_hyperband',
    project_name='uone_tuning'
)

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

In [21]:
# Create a callback to stop training early after reaching a certain value for the validation loss.
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)

In [22]:
# Start the hyperparameter tuning process
tuner.search(train_ds, epochs=4, validation_data=(test_ds), callbacks=[stop_early], verbose = 2)

Trial 2 Complete [01h 43m 14s]
val_auc: 0.7597945332527161

Best val_auc So Far: 0.7597945332527161
Total elapsed time: 03h 23m 24s


In [24]:
best_hps = tuner.get_best_hyperparameters()[0]

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. Here are the optimal values:
Dense1_units: {best_hps.get('dense_1_units')}
Dense2_units: {best_hps.get('dense_2_units')}
Dropout1: {best_hps.get('dropout_1')}
Dropout2: {best_hps.get('dropout_2')}
Learning rate: {best_hps.get('learning_rate')}
""")


The hyperparameter search is complete. Here are the optimal values:
Dense1_units: 3072
Dense2_units: 256
Dropout1: 0.30000000000000004
Dropout2: 0.4
Learning rate: 2.1149313641035656e-05



In [25]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
# Find the optimal number of epochs to train the model with the hyperparameters obtained from the search.
cnn = tuner.hypermodel.build(best_hps)
history = cnn.fit(train_ds, epochs=10, validation_data=(valid_ds), verbose = 2)

Epoch 1/10
INFO:tensorflow:Collective all_reduce tensors: 372 all_reduces, num_devices = 4, group_size = 4, implementation = CommunicationImplementation.NCCL, num_packs = 1
INFO:tensorflow:Collective all_reduce tensors: 372 all_reduces, num_devices = 4, group_size = 4, implementation = CommunicationImplementation.NCCL, num_packs = 1
13964/13964 - 1622s - loss: 0.6175 - binary_accuracy: 0.6916 - auc_1: 0.5544 - val_loss: 0.4114 - val_binary_accuracy: 0.8184 - val_auc_1: 0.6158 - 1622s/epoch - 116ms/step
Epoch 2/10
13964/13964 - 1439s - loss: 0.4382 - binary_accuracy: 0.8075 - auc_1: 0.6092 - val_loss: 0.3887 - val_binary_accuracy: 0.8358 - val_auc_1: 0.6516 - 1439s/epoch - 103ms/step
Epoch 3/10
13964/13964 - 1443s - loss: 0.4164 - binary_accuracy: 0.8175 - auc_1: 0.6418 - val_loss: 0.3898 - val_binary_accuracy: 0.8413 - val_auc_1: 0.7214 - 1443s/epoch - 103ms/step
Epoch 4/10
13964/13964 - 1442s - loss: 0.4027 - binary_accuracy: 0.8241 - auc_1: 0.6647 - val_loss: 0.3965 - val_binary_accu

In [27]:
val_auc_per_epoch = history.history['val_auc_1']
best_epoch = val_auc_per_epoch.index(max(val_auc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Best epoch: 3


# Build the Model

In [None]:
# from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout
# from tensorflow.keras.applications.densenet import DenseNet121
# import tensorflow as tf

# def create_model():
#     model = tf.keras.models.Sequential()
#     pre_trained_model = tf.keras.applications.densenet.DenseNet121(
#         include_top=False,
#         weights='imagenet',
#         input_shape=(320, 320, 3)
#     )

# #     for layer in pre_trained_model.layers:
# #         layer.trainable = False

#     model.add(pre_trained_model)
#     model.add(GlobalAveragePooling2D(input_shape=(1024, 1, 1)))
#     model.add(Dense(2048, activation='relu'))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.2))
#     model.add(Dense(512, activation='relu'))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.2))
#     model.add(tf.keras.layers.Dense(units=14, activation='sigmoid'))
    
# #     model.add(tf.keras.layers.Flatten())
# #     model.add(tf.keras.layers.Dense(units = 512, activation = 'relu'))
# #     model.add(tf.keras.layers.Dense(units = 5, activation = 'sigmoid'))

#     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999),
#                   loss='binary_crossentropy',
#                   metrics=['binary_accuracy', tf.keras.metrics.AUC(multi_label=True, num_labels=14)])

#     return model

# Train the Model

In [None]:
# import time

# def create_callbacks():
#     log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#     tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
#     checkpoint_dir = "logs/fit/uonescheckpoint"
#     tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    
#     class SaveCheckpointCallback(tf.keras.callbacks.Callback):
#         def __init__(self, checkpoint_dir, save_interval):
#             super(SaveCheckpointCallback, self).__init__()
#             self.checkpoint_dir = checkpoint_dir
#             self.save_interval = save_interval
#             self.iteration = 0

#         def on_batch_end(self, batch, logs=None):
#             self.iteration += 1
#             if self.iteration % self.save_interval == 0:
#                 model_checkpoint = os.path.join(self.checkpoint_dir, f"model_checkpoint_{self.iteration}.h5")
#                 self.model.save(model_checkpoint)
#                 print(f"Saved checkpoint at iteration {self.iteration} to {model_checkpoint}")

#     save_interval = 4800  # Adjust this as needed
#     checkpoint_callback = SaveCheckpointCallback(checkpoint_dir, save_interval)
    
#     return [checkpoint_callback, tensorboard_callback]

In [28]:
import time

def create_callbacks(run_num):
    log_dir = "logs/fit/uones_ht" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    
    # Modify the checkpoint_dir to include the run number.
    checkpoint_dir = f"logs/fit/uones_ht/run_{run_num}"

    # Ensure the directory exists
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    # SaveCheckpointCallback class definition
    class SaveCheckpointCallback(tf.keras.callbacks.Callback):
        def __init__(self, checkpoint_dir, save_interval):
            super(SaveCheckpointCallback, self).__init__()
            self.checkpoint_dir = checkpoint_dir
            self.save_interval = save_interval
            self.iteration = 0

        def on_batch_end(self, batch, logs=None):
            self.iteration += 1
            if self.iteration % self.save_interval == 0:
                model_checkpoint = os.path.join(self.checkpoint_dir, f"model_checkpoint_{self.iteration}.h5")
                self.model.save(model_checkpoint)
                print(f"Saved checkpoint at iteration {self.iteration} to {model_checkpoint}")

    save_interval = 4800 
    checkpoint_callback = SaveCheckpointCallback(checkpoint_dir, save_interval)
    
    return [checkpoint_callback, tensorboard_callback]

In [None]:
# trained_models = []

# %tensorboard --logdir logs --port 8885

# def train(num_runs, train_ds, valid_ds):

#     for run in range(num_runs):
#         print(f"Run {run + 1} of {num_runs}")

#         # Clear previous session to ensure a fresh start for each run
#         tf.keras.backend.clear_session()

#         model = create_model()
#         callbacks = create_callbacks(run+1)

#         start = time.time()
#         history = model.fit(train_ds, epochs=4, validation_data=valid_ds, batch_size=batch_size, callbacks=callbacks, verbose=2)
#         print("Total time for run", run + 1, ": ", time.time() - start, "seconds")
        
#         trained_models.append(model)

#     return trained_models

# # Define the number of runs
# num_runs = 3
# training = train(num_runs, train_ds, valid_ds)

In [29]:
trained_models = []

def train(num_runs, train_ds, valid_ds):

    for run in range(num_runs):
        print(f"Run {run + 1} of {num_runs}")

        # Clear previous session to ensure a fresh start for each run
        tf.keras.backend.clear_session()
        
        # Build the model with the optimal hyperparameters and train it on the data for 50 epochs
        # Find the optimal number of epochs to train the model with the hyperparameters obtained from the search.
        model = tuner.hypermodel.build(best_hps)
        callbacks = create_callbacks(run+1)

        start = time.time()
        history = model.fit(train_ds, epochs=best_epoch, validation_data=valid_ds, batch_size=batch_size, callbacks=callbacks, verbose=2)
        print("Total time for run", run + 1, ": ", time.time() - start, "seconds")
        
        trained_models.append(model)

    return trained_models

# Define the number of runs
num_runs = 3
training = train(num_runs, train_ds, valid_ds)

Run 1 of 3
Epoch 1/3
INFO:tensorflow:Collective all_reduce tensors: 372 all_reduces, num_devices = 4, group_size = 4, implementation = CommunicationImplementation.NCCL, num_packs = 1
INFO:tensorflow:Collective all_reduce tensors: 372 all_reduces, num_devices = 4, group_size = 4, implementation = CommunicationImplementation.NCCL, num_packs = 1


  saving_api.save_model(


Saved checkpoint at iteration 4800 to logs/fit/uones_ht/run_1/model_checkpoint_4800.h5
Saved checkpoint at iteration 9600 to logs/fit/uones_ht/run_1/model_checkpoint_9600.h5
13964/13964 - 1674s - loss: 0.6220 - binary_accuracy: 0.6871 - auc: 0.5536 - val_loss: 0.4264 - val_binary_accuracy: 0.8257 - val_auc: 0.6434 - 1674s/epoch - 120ms/step
Epoch 2/3
Saved checkpoint at iteration 14400 to logs/fit/uones_ht/run_1/model_checkpoint_14400.h5
Saved checkpoint at iteration 19200 to logs/fit/uones_ht/run_1/model_checkpoint_19200.h5
Saved checkpoint at iteration 24000 to logs/fit/uones_ht/run_1/model_checkpoint_24000.h5
13964/13964 - 1525s - loss: 0.4385 - binary_accuracy: 0.8073 - auc: 0.6091 - val_loss: 0.4109 - val_binary_accuracy: 0.8388 - val_auc: 0.6359 - 1525s/epoch - 109ms/step
Epoch 3/3
Saved checkpoint at iteration 28800 to logs/fit/uones_ht/run_1/model_checkpoint_28800.h5
Saved checkpoint at iteration 33600 to logs/fit/uones_ht/run_1/model_checkpoint_33600.h5
Saved checkpoint at ite

In [30]:
print(len(trained_models))

3


In [None]:
%tensorboard --logdir logs --port 8887

In [None]:
%tensorboard --logdir logs --port 8890

# Model Evaluate at Every Save Iterations

In [31]:
# Initialize a list to store checkpoint paths for each run
checkpoint_paths_list = []

for run in range(num_runs):
    checkpoint_paths = []  # Store checkpoint paths for the current model

    # Collect checkpoint paths
    for iteration in range(4800, 38401, 4800):  # Modify this range according to your save_interval and number of checkpoints
        checkpoint_path = f"logs/fit/uones_ht/run_{run + 1}/model_checkpoint_{iteration}.h5"
        checkpoint_paths.append(checkpoint_path)

    checkpoint_paths_list.append(checkpoint_paths)


In [None]:
# # Initialize a list to store checkpoint paths for each run
# checkpoint_paths_list = []

# for model in trained_models:
#     checkpoint_paths = []  # Store checkpoint paths for the current model

#     # Collect checkpoint paths
#     for iteration in range(4800, 52801, 4800): 
#         checkpoint_path = f"logs/fit/uonescheckpoint/model_checkpoint_{iteration}.h5"
#         checkpoint_paths.append(checkpoint_path)

#     checkpoint_paths_list.append(checkpoint_paths)


In [None]:
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Set the threshold for what messages will be logged
logging.getLogger('tensorflow').setLevel(logging.ERROR)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# # Initialize a list to store predictions for each checkpoint
# all_predictions = []

# # Iterate through the collected checkpoint paths
# for checkpoint_paths in checkpoint_paths_list:
#     predictions = []  # Store predictions for the current run

#     # Load each checkpoint and predict on the validation set
#     for checkpoint_path in checkpoint_paths:
#         model.load_weights(checkpoint_path)

#         # Predict on the validation set
#         checkpoint_predictions = model.predict(valid_ds)
#         predictions.append(checkpoint_predictions)

#     all_predictions.append(predictions)

In [32]:
from sklearn.metrics import roc_auc_score

all_valid_images = []
all_valid_labels = []

for images, labels in valid_ds:
    all_valid_images.append(images.numpy())
    all_valid_labels.append(labels.numpy())

# Concatenate all images and labels into two large numpy arrays
all_valid_images = np.concatenate(all_valid_images, axis=0)
all_valid_labels = np.concatenate(all_valid_labels, axis=0)
print(f"all_valid_images shape: {all_valid_images.shape}")
print(f"all_valid_labels shape: {all_valid_labels.shape}")

# Initialize a list to store predictions for each checkpoint
all_predictions = []

# Iterate through the collected checkpoint paths and corresponding trained model
for model, checkpoint_paths in zip(trained_models, checkpoint_paths_list):
    predictions = []  # Store predictions for the current run

    # Load each checkpoint and predict on the validation set
    for checkpoint_path in checkpoint_paths:
        model.load_weights(checkpoint_path)

        # Predict on the validation set
        checkpoint_predictions = model.predict(all_valid_images)
        predictions.append(checkpoint_predictions)

    all_predictions.append(predictions)
all_predictions = np.array(all_predictions)

print(f"all pred shape: {all_predictions.shape}")

average_auroc_list = []
num_pathologies = 5
iteration_auroc = [] # List of all AUROCs per pathology

for checkpoint_predictions in all_predictions:

    for checkpoint_index, checkpoint_prediction in enumerate(checkpoint_predictions):
        checkpoint_auroc_scores = []  # Store AUROC scores for the current model

        for pathology_index in range(num_pathologies):
            true_labels = all_valid_labels[:, pathology_index]
            auroc = roc_auc_score(true_labels, checkpoint_prediction[:, pathology_index])
            checkpoint_auroc_scores.append(auroc)

        iteration_auroc.append(checkpoint_auroc_scores)

# Convert iteration_auroc to array
iteration_auroc = np.array(iteration_auroc)

print(f"iteration_auroc_shape: {iteration_auroc.shape}")

# Calculate the average AUROC across 5 pathologies
average_auroc = np.mean(iteration_auroc, axis = 1)

# Calculate the indices that would sort the average AUROC list in descending order
sorted_indices = np.argsort(average_auroc)[::-1]

# Get the top 20 indices
top_indices = sorted_indices[:10]

# Initialize a list to store the corresponding checkpoint_auroc_scores
best_checkpoint_auroc_scores = []

# Extract the checkpoint_auroc_scores for the best 20 averages
for index in top_indices:
    best_checkpoint_auroc_scores.append(iteration_auroc[index])

best_checkpoint_auroc_scores = np.array(best_checkpoint_auroc_scores)

pathology_names = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Pleural Effusion']
# Compute AUROC, Standard Deviation, and Confidence Intervals
auroc_pathology = np.mean(best_checkpoint_auroc_scores, axis=0)
std_dev_pathology = np.std(best_checkpoint_auroc_scores, axis=0)
confidence_intervals = [(auroc - 1.96 * std, auroc + 1.96 * std) for auroc, std in zip(auroc_pathology, std_dev_pathology)]

# Header
print(f"{'Pathology':<15} {'Average AUROC':<15} {'Standard Error':<17} {'95% Confidence Interval'}")

# Separator
print('-' * 65)

# Table content
for i, pathology in enumerate(pathology_names):
    standard_error = std_dev_pathology[i] / np.sqrt(len(best_checkpoint_auroc_scores))
    lower_bound, upper_bound = confidence_intervals[i]
    print(f"{pathology:<15} {auroc_pathology[i]:<15.4f} {standard_error:<17.4f} ({lower_bound:.4f}, {upper_bound:.4f})")

# Overall AUROC
overall_ave = np.mean(average_auroc[top_indices])
print(f"\nOverall average AUROC (from top 3 models/checkpoints): {overall_ave:.4f}")


all_valid_images shape: (234, 320, 320, 3)
all_valid_labels shape: (234, 14)
all pred shape: (3, 8, 234, 14)
iteration_auroc_shape: (24, 5)
Pathology       Average AUROC   Standard Error    95% Confidence Interval
-----------------------------------------------------------------
Atelectasis     0.8189          0.0070            (0.7758, 0.8621)
Cardiomegaly    0.7804          0.0080            (0.7310, 0.8298)
Consolidation   0.8639          0.0075            (0.8173, 0.9105)
Edema           0.8866          0.0054            (0.8529, 0.9203)
Pleural Effusion 0.8884          0.0051            (0.8565, 0.9203)

Overall average AUROC (from top 3 models/checkpoints): 0.8477


In [33]:
# Getting test images and labels:
all_test_images = []
all_test_labels = []

for images, labels in test_ds:
    all_test_images.append(images.numpy())
    all_test_labels.append(labels.numpy())

# Concatenate all images and labels into two large numpy arrays
all_test_images = np.concatenate(all_test_images, axis=0)
all_test_labels = np.concatenate(all_test_labels, axis=0)
print(f"all_test_images shape: {all_test_images.shape}")
print(f"all_test_labels shape: {all_test_labels.shape}")

# Calculate the number of checkpoints per model
num_checkpoints_per_model = len(checkpoint_paths_list[0])  # or use all_predictions.shape[1] if available

# Determine the best model and checkpoint indices based on the best_model_index
best_model_index = top_indices[0] // num_checkpoints_per_model
best_checkpoint_index = top_indices[0] % num_checkpoints_per_model

# Reference the predictions of the best model and checkpoint
best_model_predictions = all_predictions[best_model_index, best_checkpoint_index]

# Load the best checkpoint weights into the corresponding model
best_model = trained_models[best_model_index]
best_checkpoint_path = checkpoint_paths_list[best_model_index][best_checkpoint_index]
best_model.load_weights(best_checkpoint_path)

# Predict on the test set using the best model
test_predictions = best_model.predict(all_test_images)
print(f"test_predictions shape: {test_predictions.shape}")

pathology_names = ['Atelectasis','Cardiomegaly','Consolidation','Edema','Pleural Effusion']  # 

# Directory where the figures will be saved
save_dir = "test_performance/uone_ht"
os.makedirs(save_dir, exist_ok=True)

# 2. Calculate and Plot ROC for each pathology
for i in range(num_pathologies):
    fpr, tpr, _ = roc_curve(test_labels[:, i], test_predictions[:, i])
    auc = roc_auc_score(test_labels[:, i], test_predictions[:, i])
    plt.figure()
    plt.plot(fpr, tpr, label=f"{pathology_names[i]} (AUC = {auc:.2f})")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve for {pathology_names[i]}")
    plt.legend(loc="lower right")
    plt.tight_layout()

    # Save ROC curve
    roc_filename = f"{pathology_names[i]}_auroc.png"
    plt.savefig(os.path.join(save_dir, roc_filename))
    plt.close()

# 2. Calculate and Plot PR curve for each pathology
for i in range(num_pathologies):
    precision, recall, _ = precision_recall_curve(test_labels[:, i], test_predictions[:, i])
    average_precision = average_precision_score(test_labels[:, i], test_predictions[:, i])
    plt.figure()
    plt.plot(recall, precision, label=f"{pathology_names[i]} (AP = {average_precision:.2f})")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall Curve for {pathology_names[i]}")
    plt.legend(loc="upper right")
    plt.tight_layout()

    # Save PR curve
    pr_filename = f"{pathology_names[i]}_pr.png"
    plt.savefig(os.path.join(save_dir, pr_filename))
    plt.close()


all_test_images shape: (668, 320, 320, 3)
all_test_labels shape: (668, 14)
test_predictions shape: (668, 14)


In [35]:
print(len(trained_models))

3
