## Chest X-Ray Image Classification (Whether Pneumonia or not)

Data source: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia

### Importing Libraries

In [20]:
import os

from PIL import Image

import torch
from torch.utils.data import Subset
import random
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models

from sklearn.metrics import accuracy_score

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [21]:
class PneumoniaDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for label in ['NORMAL', 'PNEUMONIA']:
            class_dir = os.path.join(root_dir, label)
            for img_name in os.listdir(class_dir):
                self.image_paths.append(os.path.join(class_dir, img_name))
                self.labels.append(0 if label == 'NORMAL' else 1)

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        return image, label

In [22]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [23]:
train_dataset = PneumoniaDataset(root_dir='data/train', transform=transform)
test_dataset = PneumoniaDataset(root_dir='data/test', transform=transform)
val_dataset = PneumoniaDataset(root_dir='data/val', transform=transform)

In [24]:
# Subsetting Train and Test datasets for light computation
random.seed(42)
torch.manual_seed(42)

# Get the indices for all samples in the dataset
all_train_indices = list(range(len(train_dataset)))
all_test_indices = list(range(len(test_dataset)))

# Randomly sample 10% of the indices
train_sample_size = int(0.1 * len(all_train_indices))
test_sample_size = int(0.1 * len(all_test_indices))
train_sampled_indices = random.sample(all_train_indices, train_sample_size)
test_sampled_indices = random.sample(all_test_indices, test_sample_size)

# Create a Subset dataset using the sampled indices
train_dataset = Subset(train_dataset, train_sampled_indices)
test_dataset = Subset(test_dataset, test_sampled_indices)

In [25]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [26]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 2) # NORMAL, PNEUMONIA
model = model.to(device)

In [27]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [28]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

    model.eval()
    val_labels = []
    val_preds = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())

    val_accuracy = accuracy_score(val_labels, val_preds)
    print('Validation accuracy:', val_accuracy)

Epoch 1/10, Loss: 0.29726773500442505
Validation accuracy: 0.5
Epoch 2/10, Loss: 0.16177082061767578
Validation accuracy: 0.625
Epoch 3/10, Loss: 0.13793668150901794
Validation accuracy: 0.5625
Epoch 4/10, Loss: 0.0824119821190834
Validation accuracy: 0.875
Epoch 5/10, Loss: 0.03334307670593262
Validation accuracy: 0.75
Epoch 6/10, Loss: 0.021180763840675354
Validation accuracy: 0.75
Epoch 7/10, Loss: 0.020063208416104317
Validation accuracy: 0.75
Epoch 8/10, Loss: 0.07669175416231155
Validation accuracy: 0.75
Epoch 9/10, Loss: 0.06072697043418884
Validation accuracy: 0.875
Epoch 10/10, Loss: 0.02942483127117157
Validation accuracy: 0.6875


In [29]:
model.eval()
test_labels = []
test_preds = []

model.eval()
val_labels = []
val_preds = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(preds.cpu().numpy())

test_accuracy = accuracy_score(test_labels, test_preds)
print('Test accuracy:', test_accuracy)

torch.save(model.state_dict(), 'pneumonia_classifier.pth')

Test accuracy: 0.9354838709677419
