In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from tensorflow.keras.models import load_model
from keras.applications import DenseNet169
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

import os
import shutil

import cv2

img_list and labels for pulmonary-chest-xray-abnormalities dataset

In [None]:
china_file_path = '/kaggle/input/pulmonary-chest-xray-abnormalities/ChinaSet_AllFiles/ChinaSet_AllFiles/CXR_png'
montgomery_file_path = '/kaggle/input/pulmonary-chest-xray-abnormalities/Montgomery/MontgomerySet/CXR_png'

img_list_test = []
labels_test = []

def create_img_label_list(file_path,img_list,labels):
    for filename in sorted(os.listdir(file_path)):
        label = filename.split('_')[-1][0]
        img = cv2.imread(os.path.join(file_path,filename))

        if img is None:
            print(filename)
            continue
        
        img = cv2.resize(img, (256,256))/255.0    #resizing and normalizing
        img_list.append(img)
        
        if label=='1':
            labels.append(1)
        if label=='0':
            labels.append(0)

create_img_label_list(china_file_path,img_list_test,labels_test)
create_img_label_list(montgomery_file_path,img_list_test,labels_test)

img_test_2 = np.array(img_list_test)
labels_test_2 = np.array(labels_test)

creating img_list and labels for tuberculosis-tb-chest-xray-dataset

In [None]:
# normal_img_path = '/kaggle/input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Normal'
# tb_img_path = '/kaggle/input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Tuberculosis'

# # img_list=[]
# # labels = []

# all_filenames = os.listdir(normal_img_path)
# selected_filenames = random.sample(all_filenames, 700)

# # Process selected images
# for filename in selected_filenames:
#     img = cv2.imread(os.path.join(normal_img_path, filename))
#     img = cv2.resize(img, (256, 256)) / 255.0  # Normalize
#     img_list.append(img)
#     labels.append(0) 

# for filename in os.listdir(tb_img_path):
#     img = cv2.imread(os.path.join(tb_img_path,filename))
#     img = cv2.resize(img,(256,256))/255.0
#     img_list.append(img)
#     labels.append(1)

creating img_list and labels for tbx11k-simplified dataset

In [None]:
data_df = pd.read_csv('/kaggle/input/tbx11k-simplified/tbx11k-simplified/data.csv')

active_tb = data_df[data_df['tb_type']=='active_tb']['fname'].tolist()
active_tb = list(pd.unique(active_tb))
normal = data_df[data_df['image_type']=='healthy']['fname'].tolist()

img_path = '/kaggle/input/tbx11k-simplified/tbx11k-simplified/images'

img_list=[]
labels = []

selected_normal = random.sample(normal, len(active_tb))

print(f"shape of selected_normal: {len(selected_normal)}")
print(f"shape of active_tb: {len(active_tb)}")

# Process selected images
for filename in selected_normal:
    img = cv2.imread(os.path.join(img_path, filename))
    img = cv2.resize(img, (256, 256)) / 255.0  # Normalize
    img_list.append(img)
    labels.append(0) 

for filename in active_tb:
    img = cv2.imread(os.path.join(img_path,filename))
    img = cv2.resize(img,(256,256))/255.0
    img_list.append(img)
    labels.append(1)


img_list = np.array(img_list)
labels = np.array(labels)

# shuffle_indices = np.random.permutation(len(img_list))
# img_list = img_list[shuffle_indices]
# labels = labels[shuffle_indices]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, labels_train, labels_val = train_test_split(img_list,labels,test_size=0.2,random_state=42,shuffle=True)

In [None]:
X_val, X_test, labels_val, labels_test = train_test_split(X_val,labels_val,test_size=0.20,random_state=42)

In [None]:
def jaccard_index(y_true, y_pred, smooth=100):
    y_true_f = tf.reshape(tf.cast(y_true, tf.float32), [-1])  # Flatten and cast ground truth
    y_pred_f = tf.reshape(tf.cast(y_pred, tf.float32), [-1])  # Flatten and cast predictions
    intersection = tf.reduce_sum(y_true_f * y_pred_f)  # Compute intersection
    total = tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) - intersection  # Total pixels
    return (intersection + smooth) / (total + smooth)


def dice_coefficient(y_true, y_pred, smooth=1):
    y_true_f = tf.reshape(tf.cast(y_true, tf.float32), [-1])  # Flatten and cast y_true to float32
    y_pred_f = tf.reshape(tf.cast(y_pred, tf.float32), [-1])  # Flatten and cast y_pred to float32
    
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    
    return (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)


segment_model = load_model('/kaggle/input/segment_model/keras/default/1/best_model (1).keras', custom_objects = { 'dice_coefficient':dice_coefficient, 'jaccard_index':jaccard_index})

In [None]:
def segment_and_preprocess(img):
    img_resized = np.expand_dims(img, axis=0)
    
    mask = segment_model.predict(img_resized,verbose=0)[0] 
    mask = (mask > 0.5).astype(np.float32) 
    
    segmented_xray = img * mask
    
    return segmented_xray


In [None]:
from tqdm import tqdm

# Apply segmentation and preprocessing with a progress bar
X_train_preprocessed = np.array([segment_and_preprocess(img) for img in tqdm(X_train, desc="Processing Training Data")])
X_val_preprocessed = np.array([segment_and_preprocess(img) for img in tqdm(X_val, desc="Processing Validation Data")])
X_test_preprocessed = np.array([segment_and_preprocess(img) for img in tqdm(X_test, desc="Preprocessing Test Data")])


img_test_2_preprocessed = np.array([segment_and_preprocess(img) for img in tqdm(img_test_2, desc="Processing Validation Data")])

In [None]:
batch_size = 16

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    # shear_range=0.1,
    zoom_range=0.05,
    horizontal_flip=True,
    # vertical_flip=True,
)

val_datagen = tf.keras.preprocessing.image.ImageDataGenerator()

train_data = train_datagen.flow(
    X_train_preprocessed,
    labels_train,
    batch_size=batch_size,
    shuffle=True
)

val_data = val_datagen.flow(
    X_val_preprocessed,
    labels_val,
    batch_size=batch_size,
    shuffle=True
)

Testing our ImageDataGenerator

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def show_batch(data_generator, batch_size):
    images, labels = next(data_generator)  
    plt.figure(figsize=(12, 6))

    for i in range(min(batch_size, 8)):  
        plt.subplot(2, 4, i + 1)
        plt.imshow(images[i], cmap="gray")  
        plt.title(f"Label: {labels[i]}")
        plt.axis("off")

    plt.show()

print("Training Data Batch:")
show_batch(train_data, batch_size)

print("Validation Data Batch:")
show_batch(val_data, batch_size)


In [None]:
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Concatenate, Input, BatchNormalization, GlobalAveragePooling2D, Dense, Flatten, Dropout, Conv2D, MaxPool2D,Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import AUC,Precision, Recall

strategy = tf.distribute.MirroredStrategy()
print("Number of GPUs:", strategy.num_replicas_in_sync)

with strategy.scope():

    densenet_base=DenseNet169(
        weights="imagenet",
        include_top=False,
        input_shape=(256,256,3)
    )
    densenet_base.trainable=False

    for layer in densenet_base.layers[-10:]:  
        layer.trainable = True
    
    inp = Input(shape = (256,256,3))
    
    densenet_feature=densenet_base(inp)

    x = GlobalAveragePooling2D()(densenet_feature)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = BatchNormalization()(x)
    x = Dense(8, activation='relu')(x)
    x = BatchNormalization()(x)
    out = Dense(1,activation='sigmoid')(x)
    
    classification_model = Model(inputs = inp, outputs = out)

    classification_model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss='binary_crossentropy',
        metrics=['accuracy',Precision(name='precision'), Recall(name='recall')]
    )

classification_model.summary()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint = ModelCheckpoint("best_model_2.keras", monitor="val_loss", save_best_only=True, mode="min", verbose=1)
early_stopping = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=1)
lr_scheduler = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=5, min_lr=1e-6, verbose=1)

history = classification_model.fit(
    train_data,
    validation_data=val_data,
    epochs=100,
    callbacks=[checkpoint,lr_scheduler]
)


classification_model.load_weights("best_model_2.keras")


Plotting accuracy and loss graph

In [None]:
plt.figure(figsize=(12, 5))

# Loss Plot
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

# Accuracy Plot (if your model has accuracy metric)
if 'accuracy' in history.history:
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Accuracy over Epochs')
    plt.legend()

plt.tight_layout()
plt.savefig("training_plot.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

y_true = labels_test
X_test = X_test_preprocessed

y_pred_probs = classification_model.predict(X_test) 
y_pred = (y_pred_probs > 0.5).astype(int) 

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Display classification report
print("Classification Report:")
print(classification_report(y_true, y_pred))

plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No TB", "TB"], yticklabels=["No TB", "TB"])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.savefig("confusion_matrix.png", dpi=300, bbox_inches='tight')
plt.show()
