# Chest X-ray Classification for Pneumonia Detection

## Objective
Develop a model to detect pneumonia from chest X-ray images.

## Dataset
[Chest X-ray Images (Pneumonia)](https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia) - This dataset contains chest X-ray images labeled as pneumonia or normal.

## Problem Statement
Build a classification model to detect pneumonia in chest X-rays, providing support for radiologists in diagnosing lung conditions.

## Evaluation Metrics
- Accuracy
- ROC-AUC
- F1 Score


In [None]:
# Data Collection
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Dataset paths
train_dir = '/path/to/chest_xray/train'
val_dir = '/path/to/chest_xray/val'
test_dir = '/path/to/chest_xray/test'

# Function to load images and labels
def load_data(data_dir):
    images = []
    labels = []
    for label in ['PNEUMONIA', 'NORMAL']:
        path = os.path.join(data_dir, label)
        for img in os.listdir(path):
            img_path = os.path.join(path, img)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (150, 150))
            images.append(img)
            labels.append(0 if label == 'NORMAL' else 1)
    return np.array(images), np.array(labels)

# Load train, validation, and test data
X_train, y_train = load_data(train_dir)
X_val, y_val = load_data(val_dir)
X_test, y_test = load_data(test_dir)

# Normalize the images
X_train = X_train / 255.0
X_val = X_val / 255.0
X_test = X_test / 255.0

# Display the shape of the datasets
print(f"Training data shape: {X_train.shape}, Labels shape: {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, Labels shape: {y_val.shape}")
print(f"Test data shape: {X_test.shape}, Labels shape: {y_test.shape}")


In [None]:
# Exploratory Data Analysis (EDA)
# Visualize some example images from the dataset
fig, axes = plt.subplots(2, 5, figsize=(20, 8))
axes = axes.ravel()

for i in np.arange(0, 10):
    axes[i].imshow(X_train[i])
    axes[i].set_title(f'Label: {y_train[i]}')
    axes[i].axis('off')

plt.subplots_adjust(hspace=0.5)
plt.show()


In [None]:
# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest")

# Display augmented images
datagen.fit(X_train)
augmented_images, _ = next(datagen.flow(X_train, y_train, batch_size=10))

fig, axes = plt.subplots(2, 5, figsize=(20, 8))
axes = axes.ravel()

for i in np.arange(0, 10):
    axes[i].imshow(augmented_images[i])
    axes[i].axis('off')

plt.subplots_adjust(hspace=0.5)
plt.show()


In [None]:
# Model Selection and Training
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Set up callbacks
callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ModelCheckpoint('best_model.h5', save_best_only=True)
]

# Train the model
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_val, y_val),
    epochs=50,
    callbacks=callbacks
)


In [None]:
# Model Evaluation
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, classification_report

# Evaluate on the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_prob)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")
print(f"F1 Score: {f1:.4f}")

# Display classification report
print(classification_report(y_test, y_pred))
