In [1]:
from pathlib import Path
import random, json
import numpy as np
import torch, os
import torch.nn as nn
from torch.utils.data import DataLoader, Subset, WeightedRandomSampler
from torchvision import transforms, datasets
from PIL import Image
import timm
from tqdm import tqdm
from typing import List, Tuple, Dict, Optional
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve, precision_recall_curve, confusion_matrix, classification_report

try:
    from pytorch_grad_cam import GradCAM
    from pytorch_grad_cam.utils.image import show_cam_on_image
    _HAS_GRADCAM = True
except Exception:
    _HAS_GRADCAM = False

  from .autonotebook import tqdm as notebook_tqdm


Create Config metrics for easier variable changing later down the line

In [16]:
DATA_ROOT = Path("data")   # <-- set path
TRAIN_DIR, TEST_DIR = DATA_ROOT/"train", DATA_ROOT/"test"
MODEL_DIR = Path("models_ultrasound"); MODEL_DIR.mkdir(parents=True, exist_ok=True)

BACKBONE = "efficientnet_b0"
IMG_SIZE, BATCH_SIZE = 224, 12
NUM_EPOCHS, FREEZE_EPOCHS = 30, 5
LR_HEAD, LR_BACKBONE, WEIGHT_DECAY = 1e-3, 1e-4, 1e-5
PATIENCE_ES, VAL_SPLIT, RANDOM_SEED = 6, 0.15, 42
POS_LABEL_IDX, NUM_WORKERS = 0, 2
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
USE_AMP = True

In [3]:
#for reproduction purposes set all variable random seeds
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

set_seed()


Data transformations, to further augment the data and be better prepared for actual traning and generalization towards messy irl data

In [5]:
#since expected input are grayscale (ultrasounds) reproduce it into RGB channels so it can work on pretrained models
class Ensure3Channel:
    def __call__(self, img: Image.Image) -> Image.Image:
        return img.convert("RGB")

In [7]:
#necessary transforms for more generalized to prevent overfitting and be better suited to tackle real world input
#actual values is very small so it should still be similar to original image
train_tf = transforms.Compose([
    Ensure3Channel(),
    #randomly crops parts of the images
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.80, 1.0)),
    #applies a small rotation
    transforms.RandomAffine(degrees=10, translate=(0.05,0.05), shear=5),
    #flips image left to right 50% of the time
    transforms.RandomHorizontalFlip(0.5),
    #adds a little color jitter
    transforms.ColorJitter(brightness=0.10, contrast=0.10),
    #adds a slight blur to simulate low res scans
    transforms.GaussianBlur(kernel_size=(3,3), sigma=(0.1,1.0)),
    transforms.ToTensor(),
    #normalizes the pixels for the expected ImageNet Model
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]), 
])

val_tf = transforms.Compose([
    Ensure3Channel(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

Create the data loaders 

In [17]:
def build_dataloaders(train_dir, test_dir):
    #really convenient for subfolder since it expects already that sub dirs are the classes.
    train_full = datasets.ImageFolder(str(train_dir),transform = train_tf)
    class_names = train_full.classes

    #get total length of dataset
    n = len(train_full)
    #get indices and shuffle the images around
    idx = list(range(n)); random.shuffle(idx)
    #create size of val via split
    val_n = int(n * VAL_SPLIT)
    #slice the respective train val splits
    val_idx, tr_idx = idx[:val_n], idx[val_n:]

    #generate subsets for each
    train_subset = Subset(train_full, tr_idx)
    #create the imagefolder with the transform beforehand for val
    train_val = datasets.ImageFolder(str(train_dir), transform=val_tf)
    val_subset = Subset(train_val, val_idx)

    tr_labels = [train_full.samples[i][1] for i in tr_idx]
    class_counts = {class_names[i]: tr_labels.count(i) for i in range(len(class_names))}

    #from looking at the data there seems to be a ~7:11 ratio for infected vs non, use a sampler to remedy the imbalance
    sampler = None
    if all(c > 0 for c in class_counts.values()):
        weights = np.array([1.0 / class_counts[class_names[y]] for y in tr_labels], dtype=np.float32)
        sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=(sampler is None),
                              sampler=sampler, num_workers=NUM_WORKERS, pin_memory=True)
    val_loader   = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False,
                              num_workers=NUM_WORKERS, pin_memory=True)
    test_ds = datasets.ImageFolder(str(test_dir), transform=val_tf)
    test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False,
                             num_workers=NUM_WORKERS, pin_memory=True)
    
    return train_loader, val_loader, test_loader, class_names, class_counts

train_loader, val_loader, test_loader, class_names, class_counts = build_dataloaders(TRAIN_DIR, TEST_DIR)
print("Classes:", class_names, "Train counts:", class_counts)

Classes: ['infected', 'notinfected'] Train counts: {'infected': 676, 'notinfected': 960}
