In [7]:
# importing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import datetime
import os
import warnings
warnings.simplefilter("ignore")

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, CSVLogger, ReduceLROnPlateau
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.metrics import AUC, Precision, Recall
from tensorflow.keras.applications import EfficientNetB4

from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve
from sklearn.utils import class_weight

from utils import *

%reload_ext autoreload
%autoreload 2


In [13]:
# generating augmentations through ImageDataGenerator
image_generator = ImageDataGenerator(rescale=1./255)


# assigning paths
train_folder = 'data/Tuberculosis/augmented_sorted/train'
val_folder = 'data/Tuberculosis/augmented_sorted/val'
test_folder = 'data/Tuberculosis/augmented_sorted/test'

train_generator = image_generator.flow_from_directory(train_folder, 
                                                      batch_size=16, 
                                                      shuffle=True, 
                                                      class_mode='binary',
                                                      target_size=(224, 224),
                                                      seed=42)

val_generator = image_generator.flow_from_directory(val_folder,
                                                    target_size=(224, 224),
                                                    batch_size = 16,
                                                    shuffle=True,
                                                    class_mode = 'binary',
                                                    seed = 42)

test_generator = image_generator.flow_from_directory(test_folder,
                                                     target_size = (224, 224),
                                                     batch_size = 1,
                                                     class_mode = 'binary',
                                                     shuffle=False, 
                                                     seed=42)

STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = val_generator.n // val_generator.batch_size
STEP_SIZE_TEST = test_generator.n // test_generator.batch_size

Found 9822 images belonging to 2 classes.
Found 2456 images belonging to 2 classes.
Found 3289 images belonging to 1 classes.


In [14]:
# calculate class weights
total_tb = len(os.listdir(os.path.join(train_folder, 'TB')))
total_healthy = len(os.listdir(os.path.join(train_folder, 'Non-TB')))

weight_for_0 = total_tb / (total_healthy + total_tb)
weight_for_1 = total_healthy / (total_healthy + total_tb)

class_weights = {0: weight_for_0, 1: weight_for_1}
class_weights

{0: 0.38098147016900835, 1: 0.6190185298309917}

In [15]:
# Load the EfficientNetB4 model pre-trained on ImageNet data
def create_model(input_shape):
    base_model = EfficientNetB4(include_top=False, weights='imagenet', input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False  # Freeze the pre-trained layers
    
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(units=128, activation='relu'),
        Dropout(0.2),
        Dense(units=1, activation='sigmoid')
    ])
    return model

# save model
model = create_model((224, 224, 3))

# compile model
model.compile(loss = 'binary_crossentropy',
              optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.01),
              metrics = ['accuracy', AUC(), AUC(curve='PR'), Precision(), Recall()])

In [16]:
# define callbacks
# earlyStopping = EarlyStopping(monitor = 'val_loss', 
#                               verbose = 1, 
#                               mode = 'min', 
#                               patience = 4)
lr_reduction = ReduceLROnPlateau(monitor = 'val_accuracy',
                                 patience = 3,
                                 verbose = 1,
                                 factor = 0.5,
                                 min_lr = 0.00001)
filepath = "modeling/log_effnet/model.{epoch:02d}-{val_loss:.2f}.hdf5"
mcp_save = ModelCheckpoint(filepath, 
                           verbose = 1, 
                           monitor = 'val_loss', 
                           mode = 'min')
csv_logger = CSVLogger('modeling/log_effnet/log.csv')
log_dir = "modeling/log_effnet/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_cb = TensorBoard(log_dir = log_dir,
                             histogram_freq = 1,
                             update_freq = 'batch')


In [17]:
# train model
history = model.fit_generator(generator = train_generator,
                              steps_per_epoch = STEP_SIZE_TRAIN,
                              validation_data = val_generator,
                              validation_steps = STEP_SIZE_VALID,
                              epochs = 50,
                              class_weight = class_weights,
                              callbacks = [mcp_save, lr_reduction, csv_logger])

Epoch 1/50


2023-08-14 02:23:16.082303: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-08-14 02:25:23.511217: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.



Epoch 1: saving model to modeling/log_effnet/model.01-0.53.hdf5
Epoch 2/50
Epoch 2: saving model to modeling/log_effnet/model.02-1.06.hdf5
Epoch 3/50
Epoch 3: saving model to modeling/log_effnet/model.03-1.40.hdf5
Epoch 4/50
Epoch 4: saving model to modeling/log_effnet/model.04-0.57.hdf5

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 5/50
Epoch 5: saving model to modeling/log_effnet/model.05-0.85.hdf5
Epoch 6/50
Epoch 6: saving model to modeling/log_effnet/model.06-0.53.hdf5
Epoch 7/50
Epoch 7: saving model to modeling/log_effnet/model.07-1.09.hdf5

Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 8/50
Epoch 8: saving model to modeling/log_effnet/model.08-0.85.hdf5
Epoch 9/50
Epoch 9: saving model to modeling/log_effnet/model.09-0.64.hdf5
Epoch 10/50
Epoch 10: saving model to modeling/log_effnet/model.10-0.60.hdf5

Epoch 10: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 11/50
Epoch 11: savin

KeyboardInterrupt: 