<a href="https://colab.research.google.com/github/atharabbas993/Chest-X-ray-Classification/blob/main/XRay_TB_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load Kaggle API

In [None]:
from google.colab import files
files.upload()  # Upload kaggle.json
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download Dataset

In [None]:
!kaggle datasets download -d tawsifurrahman/tuberculosis-tb-chest-xray-dataset

# Unzip Folder


In [None]:
from zipfile import ZipFile

with ZipFile('/content/tuberculosis-tb-chest-xray-dataset.zip', 'r') as zipObj:
   zipObj.extractall()

In [None]:

!pip install split-folders

# Split Folder

In [None]:
import splitfolders

#splits the data into 80% for training, 10% for validation, and 10% for testing.

splitfolders.ratio("/content/TB_Chest_Radiography_Database", output="output", seed=1337, ratio=(.8, 0.1,0.1),group_prefix=None)


# Load Images


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator


#Train data folder Directory

train_dir = '/content/output/train'

#Validation_Data_Directory

validation_dir = '/content/output/val'

#Test_Data_Directory

test_dir = '/content/output/test'




#Image Data Generator for augmentation

train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1.0/255)
test_datagen = ImageDataGenerator(rescale=1.0/255)


#____________Train_Data__________________

# Load images from directories
train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),  # Resize images
    batch_size=32,
    class_mode='binary'     # Binary classification
)

#___________Validation_Data__________________

validation_data = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

#_____________Test_Data________________________

test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)


# Build CNN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization



model = Sequential()

#_________Convalutional_Layers_______

model.add(Conv2D(64, (3,3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
model.add(BatchNormalization())

model.add(Conv2D(128, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
model.add(BatchNormalization())

model.add(Conv2D(256, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
model.add(BatchNormalization())

#______Flatten_Layer___

model.add(Flatten())

#______Fully_Connected_Layers______

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))


# Plot Layers

In [None]:
from keras.utils import plot_model

plot_model(model, show_shapes=True, show_layer_names=True)

# Model Summary


In [None]:

#Shows the model's layer details and parameters.

model.summary()

# Compile Model

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

# Early Stopping

In [None]:
from tensorflow import keras


early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=3,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
    start_from_epoch=0,
)

# Fit Data to Model

In [None]:
history = model.fit(train_data, epochs=30, validation_data=validation_data, callbacks=[early_stopping])

# Test Result Accuracy

In [None]:

test_loss, test_accuracy = model.evaluate(test_data)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")



# Accuracy Plot / Validation Plot


In [None]:

#_____________Visualize_Training_Results_________________

import matplotlib.pyplot as plt

# Plot accuracy and loss over epochs
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()

# Train Loss / Validation Plot

In [None]:

import matplotlib.pyplot as plt

# Function to plot accuracy and loss
def plot_training_history(history):
    # Plot training and validation accuracy
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Model Accuracy')
    plt.legend()

    # Plot training and validation loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Model Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Call the function
plot_training_history(history)


# Confusion Metrix

In [None]:

import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay

# Get true labels and predictions
test_data.reset()  # Reset test data generator to prevent order mismatch
y_true = test_data.classes
y_pred = (model.predict(test_data) > 0.5).astype("int32").flatten()

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_data.class_indices.keys())

# Plot confusion matrix
plt.figure(figsize=(8, 8))
disp.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix')
plt.show()

# Print classification report
print("Classification Report:\n", classification_report(y_true, y_pred, target_names=test_data.class_indices.keys()))


# Predicted Probablity

In [None]:


import seaborn as sns

# Get predicted probabilities
y_probs = model.predict(test_data).flatten()

# Plot histogram
plt.figure(figsize=(8, 6))
sns.histplot(y_probs, kde=True, bins=20, color='blue')
plt.xlabel('Predicted Probability')
plt.ylabel('Frequency')
plt.title('Histogram of Predicted Probabilities')
plt.grid()
plt.show()


# Learning Rate Scheduer


In [None]:
# Assuming you logged learning rates during training
learning_rates = [0.001, 0.0009, 0.0008, 0.0007, 0.0006]  # Example values

# Plot learning rate over epochs
plt.figure(figsize=(8, 6))
plt.plot(range(1, len(learning_rates) + 1), learning_rates, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')
plt.title('Learning Rate Schedule')
plt.grid()
plt.show()


#Per Epochs

In [None]:

import time

# If you tracked training time during each epoch:
epoch_times = [5.2, 4.8, 5.1, 5.0, 5.3]  # Example times in seconds for each epoch

# Plot training time per epoch
plt.figure(figsize=(8, 6))
plt.plot(range(1, len(epoch_times) + 1), epoch_times, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Time (seconds)')
plt.title('Training Time Per Epoch')
plt.grid()
plt.show()

# Normal / Tuberclusis

In [None]:


import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Count class instances
class_counts = pd.DataFrame({
    'Class': ['Normal', 'Tuberculosis'],
    'Count': [len(os.listdir('/content/TB_Chest_Radiography_Database/Normal')), len(os.listdir('/content/TB_Chest_Radiography_Database/Tuberculosis'))]
})

# Bar plot of class distribution
plt.figure(figsize=(8, 6))
sns.barplot(x='Class', y='Count', data=class_counts, palette='coolwarm')
plt.xlabel('Class')
plt.ylabel('Number of Images')
plt.title('Class Distribution in Dataset')
plt.show()


#Loss Over Epoch

In [None]:

plt.figure(figsize=(8, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.yscale('log')
plt.xlabel('Epochs')
plt.ylabel('Loss (Log Scale)')
plt.title('Loss Over Epochs (Log Scale)')
plt.legend()
plt.show()
