In [4]:
# Imports & GPU check
import os
import copy
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import transforms, datasets, models
from tqdm import tqdm

print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("GPU count:", torch.cuda.device_count())

Torch: 2.6.0+cu124
CUDA available: True
GPU count: 2


In [5]:
# Dataset paths
BASE_DIR = "/kaggle/input/chest-xray-pneumonia/chest_xray"

TRAIN_PATH = os.path.join(BASE_DIR, "train")
VAL_PATH   = os.path.join(BASE_DIR, "val")
TEST_PATH  = os.path.join(BASE_DIR, "test")

In [6]:
# Transforms & Dataloaders
IMG_SIZE = 224
BATCH_SIZE = 32

train_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225]),
])

val_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225]),
])

train_ds = datasets.ImageFolder(TRAIN_PATH, transform=train_tfms)
val_ds   = datasets.ImageFolder(VAL_PATH, transform=val_tfms)
test_ds  = datasets.ImageFolder(TEST_PATH, transform=val_tfms)

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE,
    shuffle=True, num_workers=2, pin_memory=True)

val_loader = DataLoader(
    val_ds, batch_size=BATCH_SIZE,
    shuffle=False, num_workers=2, pin_memory=True)

test_loader = DataLoader(
    test_ds, batch_size=BATCH_SIZE,
    shuffle=False, num_workers=2, pin_memory=True)

print("Classes:", train_ds.classes)

Classes: ['NORMAL', 'PNEUMONIA']


In [7]:
# Build ResNet-50 + multi-GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

weights = models.ResNet50_Weights.DEFAULT
base_model = models.resnet50(weights=weights)

# Freeze convolution backbone
for p in base_model.parameters():
    p.requires_grad = False

# Replace final FC layer
base_model.fc = nn.Linear(base_model.fc.in_features, 2)

# Multi-GPU support
if torch.cuda.device_count() > 1:
    print("Using DataParallel on", torch.cuda.device_count(), "GPUs")
    model = nn.DataParallel(base_model)
else:
    model = base_model

model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 194MB/s] 


Using DataParallel on 2 GPUs


In [8]:
# Training setup
criterion = nn.CrossEntropyLoss()

# train only final FC layer
params = model.module.fc.parameters() if isinstance(model, nn.DataParallel) else model.fc.parameters()

optimizer = optim.Adam(params, lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [10]:
# Training function + best model saving
def train_model(model, epochs):

    is_parallel = isinstance(model, nn.DataParallel)
    
    get_state = lambda m: m.module.state_dict() if is_parallel else m.state_dict()
    load_state = lambda m, w: m.module.load_state_dict(w) if is_parallel else m.load_state_dict(w)

    best_acc = 0
    best_weights = copy.deepcopy(get_state(model))

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        print("-"*30)

        # ---- TRAIN ----
        model.train()
        total_loss, correct = 0, 0

        for x,y in tqdm(train_loader):
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()

            out = model(x)
            loss = criterion(out,y)

            loss.backward()
            optimizer.step()

            total_loss += loss.item() * x.size(0)
            correct += (out.argmax(1) == y).sum().item()

        acc = correct / len(train_ds)
        print(f"Train Acc: {acc:.4f}")

        # ---- VALIDATE ----
        model.eval()
        correct = 0

        with torch.no_grad():
            for x,y in tqdm(val_loader):
                x,y = x.to(device), y.to(device)
                out = model(x)
                correct += (out.argmax(1) == y).sum().item()

        val_acc = correct / len(val_ds)
        print(f"Val Acc: {val_acc:.4f}")

        # SAVE BEST MODEL
        if val_acc > best_acc:
            best_acc = val_acc
            best_weights = copy.deepcopy(get_state(model))
            torch.save(best_weights, "/kaggle/working/model.pth")
            print("Saved best model")

        scheduler.step()

    load_state(model, best_weights)
    print("Best Validation Acc:", round(best_acc,4))

In [17]:
# Run training
EPOCHS = 15

trained_model = train_model(model, EPOCHS)


Epoch 1/15
------------------------------


100%|██████████| 163/163 [00:59<00:00,  2.76it/s]


Train Acc: 0.9444


100%|██████████| 1/1 [00:00<00:00,  2.20it/s]


Val Acc: 0.7500
Saved best model

Epoch 2/15
------------------------------


100%|██████████| 163/163 [00:59<00:00,  2.73it/s]


Train Acc: 0.9423


100%|██████████| 1/1 [00:00<00:00,  2.22it/s]


Val Acc: 0.7500

Epoch 3/15
------------------------------


100%|██████████| 163/163 [00:58<00:00,  2.80it/s]


Train Acc: 0.9448


100%|██████████| 1/1 [00:00<00:00,  2.13it/s]


Val Acc: 0.7500

Epoch 4/15
------------------------------


100%|██████████| 163/163 [00:59<00:00,  2.72it/s]


Train Acc: 0.9425


100%|██████████| 1/1 [00:00<00:00,  2.21it/s]


Val Acc: 0.6875

Epoch 5/15
------------------------------


100%|██████████| 163/163 [00:59<00:00,  2.75it/s]


Train Acc: 0.9434


100%|██████████| 1/1 [00:00<00:00,  2.11it/s]


Val Acc: 0.6875

Epoch 6/15
------------------------------


100%|██████████| 163/163 [00:59<00:00,  2.74it/s]


Train Acc: 0.9461


100%|██████████| 1/1 [00:00<00:00,  2.15it/s]


Val Acc: 0.7500

Epoch 7/15
------------------------------


100%|██████████| 163/163 [00:58<00:00,  2.79it/s]


Train Acc: 0.9423


100%|██████████| 1/1 [00:00<00:00,  2.13it/s]


Val Acc: 0.7500

Epoch 8/15
------------------------------


100%|██████████| 163/163 [00:57<00:00,  2.85it/s]


Train Acc: 0.9448


100%|██████████| 1/1 [00:00<00:00,  2.29it/s]


Val Acc: 0.6250

Epoch 9/15
------------------------------


100%|██████████| 163/163 [00:58<00:00,  2.79it/s]


Train Acc: 0.9423


100%|██████████| 1/1 [00:00<00:00,  2.23it/s]


Val Acc: 0.6875

Epoch 10/15
------------------------------


100%|██████████| 163/163 [00:59<00:00,  2.75it/s]


Train Acc: 0.9427


100%|██████████| 1/1 [00:00<00:00,  2.24it/s]


Val Acc: 0.7500

Epoch 11/15
------------------------------


100%|██████████| 163/163 [00:58<00:00,  2.79it/s]


Train Acc: 0.9446


100%|██████████| 1/1 [00:00<00:00,  2.32it/s]


Val Acc: 0.7500

Epoch 12/15
------------------------------


100%|██████████| 163/163 [00:57<00:00,  2.84it/s]


Train Acc: 0.9438


100%|██████████| 1/1 [00:00<00:00,  2.37it/s]


Val Acc: 0.6875

Epoch 13/15
------------------------------


100%|██████████| 163/163 [00:56<00:00,  2.88it/s]


Train Acc: 0.9427


100%|██████████| 1/1 [00:00<00:00,  2.34it/s]


Val Acc: 0.7500

Epoch 14/15
------------------------------


100%|██████████| 163/163 [00:57<00:00,  2.85it/s]


Train Acc: 0.9427


100%|██████████| 1/1 [00:00<00:00,  2.21it/s]


Val Acc: 0.7500

Epoch 15/15
------------------------------


100%|██████████| 163/163 [00:57<00:00,  2.83it/s]


Train Acc: 0.9423


100%|██████████| 1/1 [00:00<00:00,  2.35it/s]

Val Acc: 0.6250
Best Validation Acc: 0.75





In [20]:
from torchvision import models
import torch.nn as nn
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 2)

state_dict = torch.load("/kaggle/working/model.pth", map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()

trained_model = model

In [21]:
test_acc = eval_on_loader(trained_model, test_loader, device)
print("FINAL TEST ACCURACY:", round(test_acc, 4))

FINAL TEST ACCURACY: 0.8446
