<a href="https://www.kaggle.com/code/korukantiharpithrao/x-ray-classification?scriptVersionId=215301672" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# COVID-19 Radiography Dataset - CNN Model

This notebook processes the COVID-19 radiography dataset, builds a CNN model, and evaluates its performance.

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("tawsifurrahman/covid19-radiography-database")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/covid19-radiography-database


In [3]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import img_to_array
from matplotlib import pyplot as plt

In [4]:
# Set the base path for the dataset
base_path = "/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset"

# Define subsets
subsets = ["COVID", "Lung_Opacity", "Normal", "Viral Pneumonia"]


In [5]:
# Initialize lists for data and labels
data = []
labels = []

# Define image size for resizing
img_size = (128, 128)

# Load images and labels
for subset in subsets:
    image_dir = os.path.join(base_path, subset, "images")
    if os.path.exists(image_dir):
        image_files = os.listdir(image_dir)
        print(f"{subset}: Found {len(image_files)} images.")

        for image_name in image_files:
            image_path = os.path.join(image_dir, image_name)
            try:
                # Load and preprocess the image
                img = Image.open(image_path).convert("L")  # Convert to grayscale
                img = img.resize(img_size)
                img_array = img_to_array(img) / 255.0  # Normalize
                data.append(img_array)
                labels.append(subset)
            except Exception as e:
                print(f"Error loading image {image_path}: {e}")
    else:
        print(f"{subset}: No directory found.")

# Debug: Check if data and labels are populated
print(f"Number of samples: {len(data)}")
print(f"Number of labels: {len(labels)}")
print(f"Unique labels: {set(labels)}")


COVID: Found 3616 images.
Lung_Opacity: Found 6012 images.
Normal: Found 10192 images.
Viral Pneumonia: Found 1345 images.
Number of samples: 21165
Number of labels: 21165
Unique labels: {'COVID', 'Lung_Opacity', 'Viral Pneumonia', 'Normal'}


In [6]:
# Convert data and labels to numpy arrays
data = np.array(data, dtype="float32")
labels = np.array(labels)

In [7]:
# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
labels_categorical = to_categorical(labels_encoded)

In [8]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels_categorical, test_size=0.2, random_state=42)


In [9]:
# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(img_size[0], img_size[1], 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(len(subsets), activation="softmax")
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)


Epoch 1/10
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 439ms/step - accuracy: 0.6027 - loss: 0.9524 - val_accuracy: 0.7647 - val_loss: 0.5840
Epoch 2/10
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m230s[0m 433ms/step - accuracy: 0.7728 - loss: 0.5910 - val_accuracy: 0.7938 - val_loss: 0.5430
Epoch 3/10
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m230s[0m 434ms/step - accuracy: 0.8082 - loss: 0.4963 - val_accuracy: 0.8209 - val_loss: 0.4726
Epoch 4/10
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 428ms/step - accuracy: 0.8336 - loss: 0.4238 - val_accuracy: 0.8488 - val_loss: 0.4178
Epoch 5/10
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 438ms/step - accuracy: 0.8503 - loss: 0.3953 - val_accuracy: 0.8568 - val_loss: 0.3982
Epoch 6/10
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 441ms/step - accuracy: 0.8772 - loss: 0.3322 - val_accuracy: 0.8460 - val_loss: 0.4388
Epoc

In [None]:
from sklearn.metrics import classification_report

In [None]:
# Predict the class probabilities
y_pred = model.predict(X_test)

# Convert probabilities to class labels
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Generate the classification report
print(classification_report(y_test_classes, y_pred_classes, target_names=subsets))

In [12]:
# Save the model
model.save("covid_cnn_model.h5")

In [13]:
#Save the Label encoder
import pickle

# Save the LabelEncoder object
label_encoder_filename = "label_encoder.pkl"
with open(label_encoder_filename, "wb") as file:
    pickle.dump(label_encoder, file)




In [14]:
# Predict on test data
prediions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = npct.argmax(y_test, axis=1)

# Calculate overall accuracy
overall_accuracy = np.sum(predicted_classes == true_classes) / len(true_classes)
print(f"Overall Test Accuracy: {overall_accuracy * 100:.2f}%")

# Decode class indices to labels
class_labels = label_encoder.inverse_transform(predicted_classes)
true_labels = label_encoder.inverse_transform(true_classes)

# Print accuracy for individual files
print("\nFile-level accuracy:")
for i in range(len(X_test)):
    print(f"File {i + 1}:")
    print(f"  Predicted Label: {class_labels[i]}")
    print(f"  True Label: {true_labels[i]}")
    print(f"  Correct: {class_labels[i] == true_labels[i]}")
    print("-" * 30)


[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 111ms/step
Overall Test Accuracy: 86.27%

File-level accuracy:
File 1:
  Predicted Label: Viral Pneumonia
  True Label: Viral Pneumonia
  Correct: True
------------------------------
File 2:
  Predicted Label: Normal
  True Label: Normal
  Correct: True
------------------------------
File 3:
  Predicted Label: COVID
  True Label: COVID
  Correct: True
------------------------------
File 4:
  Predicted Label: Normal
  True Label: Normal
  Correct: True
------------------------------
File 5:
  Predicted Label: Normal
  True Label: Normal
  Correct: True
------------------------------
File 6:
  Predicted Label: Normal
  True Label: Lung_Opacity
  Correct: False
------------------------------
File 7:
  Predicted Label: Lung_Opacity
  True Label: Lung_Opacity
  Correct: True
------------------------------
File 8:
  Predicted Label: Normal
  True Label: Normal
  Correct: True
------------------------------
File 9:
  Predicted

In [15]:
!ls /kaggle/working


covid_cnn_model.h5  label_encoder.pkl


In [16]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image
import numpy as np

# Load the trained model
model = load_model("/kaggle/working/covid_cnn_model.h5")


# Define the image size (same as during training)
img_size = (128, 128)

# Function to predict the disease from an input image
def predict_disease(image_path):
    try:
        # Load and preprocess the input image
        img = Image.open(image_path).convert("L")  # Convert to grayscale
        img = img.resize(img_size)  # Resize to match model input
        img_array = img_to_array(img) / 255.0  # Normalize pixel values
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Make prediction
        predictions = model.predict(img_array)
        predicted_class = np.argmax(predictions, axis=1)[0]

        # Decode the predicted class label
        class_labels = label_encoder.classes_
        predicted_label = class_labels[predicted_class]

        # Display prediction
        print(f"Predicted Disease: {predicted_label}")
        return predicted_label
    except Exception as e:
        print(f"Error processing the image: {e}")

# Example usage: Replace with the path to your input image
input_image_path = "/kaggle/input/x-ray-coviddddd/x-ray img 19.jpeg"
predicted_disease = predict_disease(input_image_path)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
Predicted Disease: COVID


## Enhanced Preprocessing with Masks