In [2]:
os.chdir("..")  # Moves from Notebooks/ to the main Tuberclosis Project/
print("Now in:", os.getcwd())

Now in: d:\Tuberclosis Project


In [3]:
os.listdir("data/raw")

['Normal', 'Tuberculosis']

In [5]:
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [6]:
# Fix working directory
if 'Notebooks' in os.getcwd():
    os.chdir("..")

def prepare_directory_structure(base='data'):
    sets = ['train', 'val', 'test']
    categories = ['Tuberculosis', 'Normal']
    for s in sets:
        for cat in categories:
            path = os.path.join(base, s, cat)
            os.makedirs(path, exist_ok=True)

def split_and_copy_images(source_dir, target_base='data', split_ratio=(0.7, 0.2, 0.1)):
    category = os.path.basename(source_dir)
    images = [f for f in os.listdir(source_dir) if f.lower().endswith(('.jpg', '.png'))]
    random.shuffle(images)
    total = len(images)
    train_end = int(total * split_ratio[0])
    val_end = int(total * (split_ratio[0] + split_ratio[1]))

    for i, img_name in enumerate(images):
        src_path = os.path.join(source_dir, img_name)
        if i < train_end:
            dest = os.path.join(target_base, 'train', category, img_name)
        elif i < val_end:
            dest = os.path.join(target_base, 'val', category, img_name)
        else:
            dest = os.path.join(target_base, 'test', category, img_name)
        shutil.copy(src_path, dest)

# Run setup
prepare_directory_structure()
split_and_copy_images("data/raw/Tuberculosis")
split_and_copy_images("data/raw/Normal")


In [7]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.2,
    shear_range=0.2,
    horizontal_flip=True
)

val_test_gen = ImageDataGenerator(rescale=1./255)

train_data = train_gen.flow_from_directory(
    'data/train',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

val_data = val_test_gen.flow_from_directory(
    'data/val',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

test_data = val_test_gen.flow_from_directory(
    'data/test',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)


Found 2104 images belonging to 2 classes.
Found 602 images belonging to 2 classes.
Found 302 images belonging to 2 classes.


In [8]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze base layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=x)

model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step


In [None]:
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=10
)
# Evaluate the model on the test set

  self._warn_if_super_not_called()


Epoch 1/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m586s[0m 9s/step - accuracy: 0.5733 - loss: 0.7417 - val_accuracy: 0.8289 - val_loss: 0.4703
Epoch 2/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59905s[0m 922s/step - accuracy: 0.7362 - loss: 0.5761 - val_accuracy: 0.8289 - val_loss: 0.4571
Epoch 3/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m923s[0m 14s/step - accuracy: 0.8104 - loss: 0.5127 - val_accuracy: 0.8289 - val_loss: 0.4596
Epoch 4/10
[1m25/66[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m5:53[0m 9s/step - accuracy: 0.8286 - loss: 0.4725

In [None]:
# Evaluate on test set
test_loss, test_acc = model.evaluate(test_data)
print(f"Test Accuracy: {test_acc:.2f}")

# Classification report
pred_probs = model.predict(test_data)
pred_classes = (pred_probs > 0.5).astype(int).reshape(-1)
true_classes = test_data.classes

print(classification_report(true_classes, pred_classes, target_names=['Normal', 'Tuberculosis']))
