In [1]:
pip install tensorflow opencv-python scikit-learn matplotlib


Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import zipfile
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [5]:
zip_path = "chest.zip"
extract_path = "chest"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)


In [7]:
# Step 5: Load and preprocess data
def load_data(data_dir, img_size=224):
    categories = ['Normal', 'Tuberculosis']
    data = []
    for label, category in enumerate(categories):
        path = os.path.join(data_dir, category)
        for img_file in os.listdir(path):
            try:
                img_path = os.path.join(path, img_file)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (img_size, img_size))
                data.append([img, label])
            except Exception as e:
                continue
    return data

data_dir = "chest/chest"
data = load_data(data_dir)
print(f"Total images loaded: {len(data)}")

Total images loaded: 4200


In [9]:
# Step 6: Shuffle and prepare dataset
import numpy as np
np.random.shuffle(data)
X, y = zip(*data)
X = np.array(X) / 255.0  # Normalize
y = to_categorical(np.array(y), num_classes=2)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y)


In [11]:
# Step 7: Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# Step 8: Data augmentation and training
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.1, horizontal_flip=True)
datagen.fit(X_train)

history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                    epochs=10,
                    validation_data=(X_val, y_val))


  self._warn_if_super_not_called()


Epoch 1/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 1s/step - accuracy: 0.8874 - loss: 2.9737 - val_accuracy: 0.1905 - val_loss: 20.5310
Epoch 2/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 1s/step - accuracy: 0.9442 - loss: 0.6901 - val_accuracy: 0.5476 - val_loss: 2.5229
Epoch 3/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 1s/step - accuracy: 0.9572 - loss: 0.2711 - val_accuracy: 0.9155 - val_loss: 1.5771
Epoch 4/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 1s/step - accuracy: 0.9431 - loss: 0.5054 - val_accuracy: 0.8607 - val_loss: 0.6876
Epoch 5/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m673s[0m 6s/step - accuracy: 0.9564 - loss: 0.1920 - val_accuracy: 0.9393 - val_loss: 0.7162
Epoch 6/10
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 1s/step - accuracy: 0.9681 - loss: 0.2208 - val_accuracy: 0.9310 - val_loss: 1.3132
Epoch 7/10
[1m105/10

In [15]:
# Step 9: Evaluate model
val_loss, val_acc = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {val_acc * 100:.2f}%")

y_pred = np.argmax(model.predict(X_val), axis=1)
y_true = np.argmax(y_val, axis=1)
print(classification_report(y_true, y_pred, target_names=['Normal', 'Tuberculosis']))


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 181ms/step - accuracy: 0.8966 - loss: 1.2525
Validation Accuracy: 91.07%
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 182ms/step
              precision    recall  f1-score   support

      Normal       0.90      1.00      0.95       700
Tuberculosis       1.00      0.46      0.63       140

    accuracy                           0.91       840
   macro avg       0.95      0.73      0.79       840
weighted avg       0.92      0.91      0.90       840



In [17]:
# Step 10: Save the model
model.save("chest_xray_model.h5")
print("Model saved as chest_xray_model.h5")



Model saved as chest_xray_model.h5


In [23]:
# Step 11: Real-time prediction function
def diagnose_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print("Error: Image not found or path is incorrect.")
        return None
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img / 255.0, axis=0)
    pred = model.predict(img)
    label = np.argmax(pred)
    confidence = np.max(pred)
    return ("Normal" if label == 0 else "Tuberculosis", f"{confidence * 100:.2f}%")

# Call the function here
result, confidence = diagnose_image('i1.jpeg')
print("Prediction:", result)
print("Confidence:", confidence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Prediction: Normal
Confidence: 100.00%
