<h1><center><font size="8">CNN Model for Tuberculosis Detection</font></center></h1>


# <a id='1'>Importing Packages</a>

In [1]:
import tensorflow as tf
print(f"TensorFlow has access to the following devices:\n{tf.config.list_physical_devices()}")
print(f"TensorFlow version: {tf.__version__}")

TensorFlow has access to the following devices:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
TensorFlow version: 2.13.0


In [2]:
# importing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import datetime

import warnings
warnings.simplefilter("ignore")

import tensorflow as tf
from keras import Model
from keras.models import load_model, Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, CSVLogger, ReduceLROnPlateau
from keras.applications.vgg16 import VGG16
from keras.metrics import AUC, Precision, Recall

from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve
from sklearn.utils import class_weight

import os

# from utils import *

# %reload_ext autoreload
# %autoreload 2


# <a id='2'>Augmentation through ImageDataGenerator</a>


## <a id='21'>Generating the Datasets</a>
 

In [3]:
# generating augmentations through ImageDataGenerator
image_generator = ImageDataGenerator(rescale=1./255)


# assigning paths
train_folder = '../../data/tuberculosis-detection-model/augmented_sorted/train'
val_folder = '../../data/tuberculosis-detection-model/augmented_sorted/val'
test_folder = '../../data/tuberculosis-detection-model/original_sorted/test'

TARGET_SIZE = (224, 224)
BATCH_SIZE = 16
IMAGE_SIZE = 224

train_generator = image_generator.flow_from_directory(train_folder, 
                                                      batch_size=BATCH_SIZE, 
                                                      shuffle=True, 
                                                      class_mode='binary',
                                                      target_size=TARGET_SIZE,
                                                      seed=42)

val_generator = image_generator.flow_from_directory(val_folder,
                                                    target_size=TARGET_SIZE,
                                                    batch_size = BATCH_SIZE,
                                                    shuffle=True,
                                                    class_mode = 'binary',
                                                    seed=42)

test_generator = image_generator.flow_from_directory(test_folder,
                                                     target_size=TARGET_SIZE,
                                                     batch_size = 1,
                                                     class_mode = 'binary',
                                                     shuffle=False, 
                                                     seed=42)

STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = val_generator.n // val_generator.batch_size
STEP_SIZE_TEST = test_generator.n // test_generator.batch_size

Found 9822 images belonging to 2 classes.
Found 1995 images belonging to 2 classes.
Found 1036 images belonging to 2 classes.


In [4]:
# calculate class weights
tb = len(os.listdir(os.path.join(train_folder, 'TB')))
# aug = len(os.listdir(os.path.join(train_folder, 'TB/augmentations')))
total_healthy = len(os.listdir(os.path.join(train_folder, 'Non-TB')))

weight_for_0 = tb / (total_healthy + tb)
weight_for_1 = total_healthy / (total_healthy + tb)

class_weights = {0: weight_for_0, 1: weight_for_1}
class_weights

{0: 0.38098147016900835, 1: 0.6190185298309917}

## <a id='22'>Constructing and Compiling the Model</a>


In [5]:
# create model
def create_model(input_shape):
    return Sequential([
        
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.5),
        
        Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
        BatchNormalization(),
        Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.5),
        
        Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
        BatchNormalization(),
        Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.5),
        
        Flatten(),
        Dense(units=128, activation='relu'),
        Dropout(0.3),
        Dense(units=1, activation='sigmoid')
        ])

# save model
model = create_model((IMAGE_SIZE, IMAGE_SIZE, 3))

# compile model
model.compile(loss = 'binary_crossentropy',
              optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.01),
              metrics = ['accuracy', AUC(), AUC(curve='PR'), Precision(), Recall()])

2023-08-17 23:31:55.665130: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2023-08-17 23:31:55.665161: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-08-17 23:31:55.665172: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-08-17 23:31:55.665218: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-08-17 23:31:55.665248: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 batch_normalization (Batch  (None, 222, 222, 32)      128       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 220, 220, 32)      9248      
                                                                 
 batch_normalization_1 (Bat  (None, 220, 220, 32)      128       
 chNormalization)                                                
                                                                 
 max_pooling2d (MaxPooling2  (None, 110, 110, 32)      0         
 D)                                                              
                                                        

## <a id='23'>Defining Callbacks</a>


In [7]:
# define callbacks
# earlyStopping = EarlyStopping(monitor = 'val_loss', 
#                               verbose = 1, 
#                               mode = 'min', 
#                               patience = 4)
lr_reduction = ReduceLROnPlateau(monitor = 'val_accuracy',
                                 patience = 3,
                                 verbose = 1,
                                 factor = 0.5,
                                 min_lr = 0.0001)
filepath = "modeling/log_vgg16_copy_2/model.{epoch:02d}-{val_loss:.2f}.hdf5"
mcp_save = ModelCheckpoint(filepath, 
                           verbose = 1, 
                           monitor = 'val_loss', 
                           mode = 'min')
csv_logger = CSVLogger('modeling/log_vgg16_copy_2/log.csv')


## <a id='24'>Training the Model</a>


In [8]:
# train model
history = model.fit_generator(generator = train_generator,
                              steps_per_epoch = STEP_SIZE_TRAIN,
                              validation_data = val_generator,
                              validation_steps = STEP_SIZE_VALID,
                              epochs = 30,
                              class_weight = class_weights,
                              callbacks = [mcp_save, lr_reduction, csv_logger])

Epoch 1/30


2023-08-17 23:32:25.323057: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




KeyboardInterrupt: 

In [None]:
# evaluate model by displaying metrics with visualization
plt.style.use('seaborn')
sixplot2(history, 'auc_4', 'val_auc_4', 'auc_5', 'val_auc_5', 'precision_2', 'val_precision_2', 'recall_2', 'val_recall_2')

## Loading Best Model

In [None]:
model_2 = load_model('modeling/log_vgg16_copy_2/model.28-2399.44.hdf5')

## <a id='47'>Predicting on Holdout Set</a>

In [None]:
# get testing accuracy and loss
test_generator.reset()
score = model.evaluate_generator(test_generator, verbose = 1)

print("Loss: " + str(score[0]))
print("Accuracy: " + str(score[1]))
# print("Precision: " + str(score[2]))
# print("Recall: " + str(score[3]))
# print("ROC-AUC: " + str(score[4]))
# print("PR-AUC: " + str(score[5]))

## Removing Corrupted Images

In [None]:
image_list = glob.glob('data/Tuberculosis/augmented_sorted/test/test/*.png')

In [None]:
def is_image_corrupted(image_path):
    try:
        # Try to open the image
        with Image.open(image_path) as img:
            img.verify()  # This method checks for corruption in the image
        return False  # Image is not corrupted
    except Exception as e:
        print(f"Image is corrupted: {e}")
        return True  # Image is corrupted

In [None]:
for image in image_list:
    if is_image_corrupted(image):
        print(image)
        os.remove(image)

In [None]:
for image in image_list:
    if is_image_corrupted(image):
        print(image)

In [None]:
# visualization for confusion matrix
test_generator.reset()
pred = model.predict_generator(test_generator, steps = STEP_SIZE_TEST, verbose = 1)
y_true = test_generator.classes
y_pred = pred > 0.5
make_confusion_matrix(y_true, y_pred)