In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, f1_score

In [5]:
# 1. Config
DATA_CSV = "Data_Entry_2017.csv"
IMG_DIR = "../../Dataset/archive"
IMG_SIZE = 224
BATCH_SIZE = 32
LR = 1e-4
EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# 2. Dataset
class XrayDataset(Dataset):
    def __init__(self, df, img_dir, transform):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        img = Image.open(f"{self.img_dir}/{row['Image Index']}").convert("RGB")
        img = self.transform(img)
        labels = torch.FloatTensor([int(c) for c in row['Finding Labels'].split("|")])
        return img, labels

In [7]:
# 3. Data
df = pd.read_csv(DATA_CSV)
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE,IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)
train_loader = DataLoader(XrayDataset(train_df, IMG_DIR, transform),
                          batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(XrayDataset(test_df,  IMG_DIR, transform),
                          batch_size=BATCH_SIZE)

FileNotFoundError: [Errno 2] No such file or directory: 'Data_Entry_2017.csv'

In [None]:
# 4. Model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 15)
model = model.to(DEVICE)

# 5. Loss & Opt
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# 6. Train
for epoch in range(EPOCHS):
    model.train()
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        preds = model(imgs)
        loss = criterion(preds, labels)
        optimizer.zero_grad(); loss.backward(); optimizer.step()
    print(f"Epoch {epoch+1}/{EPOCHS} complete")

# 7. Save
torch.save({
    'state_dict': model.state_dict(),
    'arch': 'resnet18',
    'classes': 15
}, "model.pth")

# 8. Evaluate
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(DEVICE)
        preds = torch.sigmoid(model(imgs)).cpu().numpy()
        all_preds.append(preds)
        all_labels.append(labels.numpy())
all_preds = np.vstack(all_preds)
all_labels = np.vstack(all_labels)

# Metrics
aucs = [roc_auc_score(all_labels[:,i], all_preds[:,i]) for i in range(15)]
f1s  = [f1_score(all_labels[:,i], (all_preds[:,i]>0.5).astype(int)) for i in range(15)]
print("Mean AUC:", np.mean(aucs))
print("Mean F1:", np.mean(f1s))