NEW

In [2]:
!pip install -q kaggle

# Upload kaggle.json if not uploaded already
from google.colab import files
files.upload()  # choose kaggle.json

# Set up Kaggle API
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# ✅ Download correct PneumoniaMNIST dataset
!kaggle datasets download -d rijulshr/pneumoniamnist
!unzip pneumoniamnist.zip -d /content/pneumoniamnist

Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/rijulshr/pneumoniamnist
License(s): MIT
Downloading pneumoniamnist.zip to /content
  0% 0.00/3.39M [00:00<?, ?B/s]
100% 3.39M/3.39M [00:00<00:00, 800MB/s]
Archive:  pneumoniamnist.zip
  inflating: /content/pneumoniamnist/pneumoniamnist.npz  


In [3]:
import numpy as np
data = np.load('/content/pneumoniamnist/pneumoniamnist.npz')
print(data.files)

['train_images', 'train_labels', 'val_images', 'val_labels', 'test_images', 'test_labels']


In [4]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import classification_report
from PIL import Image
import matplotlib.pyplot as plt


In [5]:
class PneumoniaSplitDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.fromarray(self.images[idx].astype(np.uint8), mode='L').convert('RGB')
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        if self.transform:
            image = self.transform(image)
        return image, label

In [6]:
# Load the single .npz file
data = np.load('/content/pneumoniamnist/pneumoniamnist.npz')

# Define transforms
transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Create datasets
train_dataset = PneumoniaSplitDataset(data['train_images'], data['train_labels'], transform=transform)
val_dataset   = PneumoniaSplitDataset(data['val_images'], data['val_labels'], transform=transform)
test_dataset  = PneumoniaSplitDataset(data['test_images'], data['test_labels'], transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# ✅ DETECT CLASS IMBALANCE AND COMPUTE pos_weight
import numpy as np

labels_np = data['train_labels']
unique, counts = np.unique(labels_np, return_counts=True)

# Print class distribution
for u, c in zip(unique, counts):
    print(f"Class {int(u)}: {c} samples")

# Compute pos_weight
neg_count = counts[0]  # Normal
pos_count = counts[1]  # Pneumonia
pos_weight = torch.tensor([neg_count / pos_count], dtype=torch.float32).to(device)

print(f"\nCalculated pos_weight for BCEWithLogitsLoss: {pos_weight.item():.4f}")

Class 0: 388 samples
Class 1: 3494 samples

Calculated pos_weight for BCEWithLogitsLoss: 0.1110


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.inception_v3(pretrained=True)
model.aux_logits = False
model.fc = nn.Linear(model.fc.in_features, 1)

model = model.to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = optim.Adam(model.parameters(), lr=0.0001)


Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 174MB/s] 


In [10]:
epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device).view(-1, 1)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")


Epoch 1/5, Loss: 0.0536
Epoch 2/5, Loss: 0.0288
Epoch 3/5, Loss: 0.0225
Epoch 4/5, Loss: 0.0227
Epoch 5/5, Loss: 0.0167


In [11]:
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = torch.sigmoid(outputs).cpu().numpy() > 0.5
        all_preds.extend(preds.astype(int))
        all_labels.extend(labels.numpy())

print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=["Normal", "Pneumonia"]))


Classification Report:
              precision    recall  f1-score   support

      Normal       0.95      0.79      0.86       234
   Pneumonia       0.88      0.97      0.93       390

    accuracy                           0.90       624
   macro avg       0.92      0.88      0.89       624
weighted avg       0.91      0.90      0.90       624



In [12]:
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# Convert predictions and labels to numpy arrays
y_true = np.array(all_labels)
y_pred = np.array(all_preds).flatten()

# Print metrics
print("🔍 Evaluation Metrics:")
print(f"Accuracy:  {accuracy_score(y_true, y_pred):.4f}")
print(f"Precision: {precision_score(y_true, y_pred):.4f}")
print(f"Recall:    {recall_score(y_true, y_pred):.4f}")
print(f"F1 Score:  {f1_score(y_true, y_pred):.4f}")

# Optional: detailed classification report
print("\nDetailed Classification Report:")
print(classification_report(y_true, y_pred, target_names=["Normal", "Pneumonia"]))


🔍 Evaluation Metrics:
Accuracy:  0.9038
Precision: 0.8837
Recall:    0.9744
F1 Score:  0.9268

Detailed Classification Report:
              precision    recall  f1-score   support

      Normal       0.95      0.79      0.86       234
   Pneumonia       0.88      0.97      0.93       390

    accuracy                           0.90       624
   macro avg       0.92      0.88      0.89       624
weighted avg       0.91      0.90      0.90       624

