In [1]:
import os

max_files = 50  # number of files to print
count = 0

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        count += 1
        if count >= max_files:
            break
    if count >= max_files:
        break


/kaggle/input/data/LOG_CHESTXRAY.pdf
/kaggle/input/data/README_CHESTXRAY.pdf
/kaggle/input/data/BBox_List_2017.csv
/kaggle/input/data/test_list.txt
/kaggle/input/data/ARXIV_V5_CHESTXRAY.pdf
/kaggle/input/data/Data_Entry_2017.csv
/kaggle/input/data/train_val_list.txt
/kaggle/input/data/FAQ_CHESTXRAY.pdf
/kaggle/input/data/images_003/images/00006199_010.png
/kaggle/input/data/images_003/images/00004833_016.png
/kaggle/input/data/images_003/images/00006260_000.png
/kaggle/input/data/images_003/images/00004911_010.png
/kaggle/input/data/images_003/images/00004186_007.png
/kaggle/input/data/images_003/images/00004459_000.png
/kaggle/input/data/images_003/images/00005004_001.png
/kaggle/input/data/images_003/images/00005757_000.png
/kaggle/input/data/images_003/images/00004221_001.png
/kaggle/input/data/images_003/images/00004875_000.png
/kaggle/input/data/images_003/images/00005448_001.png
/kaggle/input/data/images_003/images/00005572_000.png
/kaggle/input/data/images_003/images/00005271_00

In [2]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, f1_score
import numpy as np
import warnings

In [4]:
warnings.filterwarnings("ignore")

### Paths & Parameters

In [5]:
csv_path = "/kaggle/input/data/Data_Entry_2017.csv"
train_val_list = "/kaggle/input/data/train_val_list.txt"
test_list = "/kaggle/input/data/test_list.txt"
data_dir = "/kaggle/input/data"

all_labels = ["Atelectasis", "Cardiomegaly", "Effusion", "Infiltration",
              "Mass", "Nodule", "Pneumonia", "Pneumothorax", "Consolidation",
              "Edema", "Emphysema", "Fibrosis", "Pleural_Thickening", "Hernia"]

BATCH_SIZE = 16
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = len(all_labels)
NUM_EPOCHS = 15 

### Load metadata & encode labels

In [6]:
df = pd.read_csv(csv_path)

def encode_labels(label_string):
    labels = [0] * NUM_CLASSES
    for i, cls in enumerate(all_labels):
        if cls in label_string:
            labels[i] = 1
    return labels

label_map = {row["Image Index"]: encode_labels(row["Finding Labels"]) for _, row in df.iterrows()}

### Read train/val/test splits

In [7]:
with open(train_val_list, "r") as f:
    train_val_imgs = [line.strip() for line in f.readlines()]

with open(test_list, "r") as f:
    test_imgs = [line.strip() for line in f.readlines()]

# Filter train/val dataframe
train_val_df = df[df["Image Index"].isin(train_val_imgs)]

# Patient-level split (no overlap)
patients = train_val_df["Patient ID"].unique()
train_patients, val_patients = train_test_split(patients, test_size=0.2, random_state=42)

train_df = train_val_df[train_val_df["Patient ID"].isin(train_patients)]
val_df = train_val_df[train_val_df["Patient ID"].isin(val_patients)]

train_imgs = train_df["Image Index"].tolist()
val_imgs = val_df["Image Index"].tolist()

test_df = df[df["Image Index"].isin(test_imgs)]
test_imgs = test_df["Image Index"].tolist()

print(f"Number of training images: {len(train_imgs)}")
print(f"Number of validation images: {len(val_imgs)}")
print(f"Number of test images: {len(test_imgs)}")


Number of training images: 69625
Number of validation images: 16899
Number of test images: 25596


### Dataset

In [8]:
class ChestXrayDataset(Dataset):
    def __init__(self, img_list, labels, root_dir, transform=None):
        self.img_list = img_list
        self.labels = labels
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_name = self.img_list[idx]
        # Search in subfolders
        img_path = None
        for part in os.listdir(self.root_dir):
            possible_path = os.path.join(self.root_dir, part, "images", img_name)
            if os.path.exists(possible_path):
                img_path = possible_path
                break
        if img_path is None:
            raise FileNotFoundError(f"{img_name} not found in {self.root_dir}")
        image = Image.open(img_path).convert("RGB")
        label = torch.FloatTensor(self.labels[img_name])
        if self.transform:
            image = self.transform(image)
        return image, label

### Transforms with additional augmentation methods & DataLoaders

In [9]:
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std  = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomResizedCrop(224, scale=(0.9, 1.1)),  # random zoom & crop
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # random translations
    transforms.RandomPerspective(distortion_scale=0.1, p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
])

train_dataset = ChestXrayDataset(train_imgs, label_map, data_dir, train_transform)
val_dataset   = ChestXrayDataset(val_imgs, label_map, data_dir, val_test_transform)
test_dataset  = ChestXrayDataset(test_imgs, label_map, data_dir, val_test_transform)


train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

### Focal Loss

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        bce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        pt = torch.exp(-bce_loss)  # probability of being classified correctly
        focal_loss = self.alpha * (1 - pt) ** self.gamma * bce_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        return focal_loss


### Model, Loss, Optimizer

In [11]:
model = models.densenet121(pretrained=True)
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, NUM_CLASSES)
model = model.to(DEVICE)

criterion = FocalLoss(alpha=1, gamma=2)
optimizer = optim.Adam(model.parameters(), lr=LR, betas=(0.9,0.999))
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1, verbose=True)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 139MB/s]


### Training & Validation

In [12]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for images, labels in tqdm(loader, desc="Training", leave=False):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * images.size(0)
    return total_loss / len(loader.dataset)

def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_labels = []
    all_outputs = []
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validation", leave=False):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)
            all_labels.append(labels.cpu())
            all_outputs.append(outputs.cpu())
    all_labels = torch.cat(all_labels).numpy()
    all_outputs = torch.cat(all_outputs).numpy()
    # AUROC per class
    aucs = []
    for i in range(all_labels.shape[1]):
        if all_labels[:, i].sum() > 0:
            aucs.append(roc_auc_score(all_labels[:, i], all_outputs[:, i]))
    mean_auc = sum(aucs) / len(aucs) if aucs else 0.0
    return total_loss / len(loader.dataset), mean_auc

### Run Training

In [13]:
best_auc = 0.0
for epoch in range(NUM_EPOCHS):
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    val_loss, val_auc = validate(model, val_loader, criterion, DEVICE)
    scheduler.step(val_loss)
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val AUROC: {val_auc:.4f}")
    # Save best model
    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), "best_chexnet.pth")
        print("Best model saved.")


Epoch 1/15


                                                               

Train Loss: 0.0415 | Val Loss: 0.0384 | Val AUROC: 0.7966
Best model saved.

Epoch 2/15


                                                               

Train Loss: 0.0388 | Val Loss: 0.0374 | Val AUROC: 0.8145
Best model saved.

Epoch 3/15


                                                               

Train Loss: 0.0380 | Val Loss: 0.0369 | Val AUROC: 0.8215
Best model saved.

Epoch 4/15


                                                               

Train Loss: 0.0373 | Val Loss: 0.0366 | Val AUROC: 0.8276
Best model saved.

Epoch 5/15


                                                               

Train Loss: 0.0368 | Val Loss: 0.0363 | Val AUROC: 0.8285
Best model saved.

Epoch 6/15


                                                               

Train Loss: 0.0363 | Val Loss: 0.0363 | Val AUROC: 0.8318
Best model saved.

Epoch 7/15


                                                               

Train Loss: 0.0358 | Val Loss: 0.0375 | Val AUROC: 0.8247

Epoch 8/15


                                                               

Train Loss: 0.0354 | Val Loss: 0.0368 | Val AUROC: 0.8308

Epoch 9/15


                                                               

Train Loss: 0.0337 | Val Loss: 0.0358 | Val AUROC: 0.8398
Best model saved.

Epoch 10/15


                                                               

Train Loss: 0.0330 | Val Loss: 0.0359 | Val AUROC: 0.8378

Epoch 11/15


                                                               

Train Loss: 0.0327 | Val Loss: 0.0363 | Val AUROC: 0.8363

Epoch 12/15


                                                               

Train Loss: 0.0324 | Val Loss: 0.0363 | Val AUROC: 0.8369

Epoch 13/15


                                                               

Train Loss: 0.0323 | Val Loss: 0.0364 | Val AUROC: 0.8370

Epoch 14/15


                                                               

Train Loss: 0.0323 | Val Loss: 0.0361 | Val AUROC: 0.8368

Epoch 15/15


                                                               

Train Loss: 0.0322 | Val Loss: 0.0361 | Val AUROC: 0.8361


### Test Evaluation

In [14]:
model.load_state_dict(torch.load("best_chexnet.pth"))
test_loss, test_auc = validate(model, test_loader, criterion, DEVICE)
print(f"\nFinal Test Loss: {test_loss:.4f} | Test AUROC: {test_auc:.4f}")

                                                               


Final Test Loss: 0.0553 | Test AUROC: 0.8143




In [15]:
def evaluate_metrics(model, dataloader, device, all_labels, threshold=0.5):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, targets in dataloader:
            images = images.to(device)
            targets = targets.cpu().numpy()
            outputs = torch.sigmoid(model(images)).cpu().numpy()
            
            y_true.append(targets)
            y_pred.append(outputs)

    y_true = np.vstack(y_true)
    y_pred = np.vstack(y_pred)

    per_class_auc = {}
    per_class_f1 = {}

    for i, cls in enumerate(all_labels):
        if y_true[:, i].sum() > 0:  # class has at least one positive
            try:
                auc = roc_auc_score(y_true[:, i], y_pred[:, i])
            except ValueError:
                auc = float('nan')
            per_class_auc[cls] = auc

            # binarize predictions
            y_bin = (y_pred[:, i] >= threshold).astype(int)
            f1 = f1_score(y_true[:, i], y_bin, zero_division=0)
            per_class_f1[cls] = f1
        else:
            per_class_auc[cls] = float('nan')
            per_class_f1[cls] = float('nan')

    mean_auc = np.nanmean(list(per_class_auc.values()))
    mean_f1 = np.nanmean(list(per_class_f1.values()))

    return per_class_auc, mean_auc, per_class_f1, mean_f1

In [16]:
per_class_auc, mean_auc, per_class_f1, mean_f1 = evaluate_metrics(model, test_loader, DEVICE, all_labels)

print("Per-class AUCs:")
for disease, auc in per_class_auc.items():
    print(f"{disease}: {auc:.4f}")

print("\nPer-class F1 scores:")
for disease, f1 in per_class_f1.items():
    print(f"{disease}: {f1:.4f}")

print(f"\nMean AUC (14 classes): {mean_auc:.4f}")
print(f"Mean F1 (14 classes): {mean_f1:.4f}")


Per-class AUCs:
Atelectasis: 0.7762
Cardiomegaly: 0.8921
Effusion: 0.8316
Infiltration: 0.6876
Mass: 0.8227
Nodule: 0.7609
Pneumonia: 0.7228
Pneumothorax: 0.8569
Consolidation: 0.7530
Edema: 0.8474
Emphysema: 0.9179
Fibrosis: 0.8282
Pleural_Thickening: 0.7738
Hernia: 0.9290

Per-class F1 scores:
Atelectasis: 0.2467
Cardiomegaly: 0.3027
Effusion: 0.4667
Infiltration: 0.3063
Mass: 0.2820
Nodule: 0.0954
Pneumonia: 0.0000
Pneumothorax: 0.2703
Consolidation: 0.0119
Edema: 0.0682
Emphysema: 0.4191
Fibrosis: 0.0304
Pleural_Thickening: 0.0331
Hernia: 0.4812

Mean AUC (14 classes): 0.8143
Mean F1 (14 classes): 0.2153
