In [1]:
import albumentations as A 
import cv2 
import numpy as np
import os
import pandas as pd 
import torch

from collections import defaultdict
from importlib import import_module
from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from skp.toolbox.functions import load_model_from_config

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [2]:
def pad_to_aspect_ratio(img: np.ndarray, aspect_ratio: float) -> np.ndarray:
    """
    Pads to specified aspect ratio, only if current aspect ratio is
    greater.
    """
    h, w = img.shape[:2]
    if h / w > aspect_ratio:
        new_w = round(h / aspect_ratio)
        w_diff = new_w - w
        left_pad = w_diff // 2
        right_pad = w_diff - left_pad
        padding = ((0, 0), (left_pad, right_pad))
        if img.ndim == 3:
            padding = padding + ((0, 0),)
        img = np.pad(img, padding, mode="constant", constant_values=0)
    return img

In [3]:
class MammoDataset(Dataset):

    def __init__(self, df):
        self.df = df.copy()
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, i):
        row = self.df.iloc[i]
        img = cv2.imread(os.path.join("/mnt/stor/datasets/kaggle/rsna-breast-cancer-detection/train_cropped_png/", row.filename), 0)
        # 2048 x 1024 
        img1 = pad_to_aspect_ratio(img.copy(), 2048 / 1024)
        img1 = A.Resize(2048, 1024)(image=img1)["image"]
        # 1920 x 1280
        img2 = pad_to_aspect_ratio(img.copy(), 1920 / 1280)
        img2 = A.Resize(1920, 1280)(image=img2)["image"]
        # 1536 x 1536
        img3 = A.Compose([A.LongestMaxSize(1536), A.PadIfNeeded(1536, 1536)])(image=img.copy())["image"]
        return {
            "img1": torch.from_numpy(img1).unsqueeze(0),
            "img2": torch.from_numpy(img2).unsqueeze(0),
            "img3": torch.from_numpy(img3).unsqueeze(0),
            "label": torch.tensor(row.cancer),
            "patient_id": row.patient_id,
            "image_id": row.image_id, 
            "laterality": row.laterality,
            "view": row["view"],
            "breast_id": row.breast_id,
            "filename": row.filename,
        }

In [4]:
def run_test(df, model_list):
    pred_df = defaultdict(list)
    dataset = MammoDataset(df)
    loader = DataLoader(dataset, batch_size=32, num_workers=8, shuffle=False, drop_last=False)
    for batch in tqdm(loader, total=len(loader)):
        with torch.inference_mode():
            for i, (each_img, each_model) in enumerate(zip([f"img{_ii + 1}" for _ii in range(len(model_list))], model_list)):
                x = batch[each_img]
                with torch.inference_mode():
                    out = each_model({"x": x.to("cuda:0")})["logits"].sigmoid()[:, 0]
                    pred_df[f"y_pred{i}"].extend(list(out.cpu().numpy()))
        pred_df["y_true"].extend(list(batch["label"].numpy()))
        pred_df["patient_id"].extend(list(batch["patient_id"].numpy()))
        pred_df["image_id"].extend(list(batch["image_id"].numpy()))
        pred_df["laterality"].extend(list(batch["laterality"]))
        pred_df["view"].extend(list(batch["view"]))
        pred_df["breast_id"].extend(list(batch["breast_id"]))
        pred_df["filename"].extend(list(batch["filename"]))
    return pd.DataFrame(pred_df)

## Split 1

In [5]:
cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/6a3b30f7/fold0/checkpoints/ema_weights.pt"
model1 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses_v02"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/ddfeb3f6/fold1/checkpoints/ema_weights.pt"
model2 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses_v04"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/b386692c/fold2/checkpoints/ema_weights.pt"
model3 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses/6a3b30f7/fold0/checkpoints/ema_weights.pt ...
Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses_v02/ddfeb3f6/fold1/checkpoints/ema_weights.pt ...
Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses_v04/b386692c/fold2/checkpoints/ema_weights.pt ...


In [6]:
test_df = pd.read_csv("/mnt/stor/datasets/kaggle/rsna-breast-cancer-detection/holdout_test_v01.csv")
test_df

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,cancer,biopsy,invasive,BIRADS,implant,density,machine_id,difficult_negative_case,filename,breast_id,breast_id_index,sampling_weight
0,1,10042,102733848,L,CC,51.0,0,0,0,1.0,0,2.0,93,0,10042/102733848.png,10042_L,8,1.0
1,1,10042,202939811,L,MLO,51.0,0,0,0,1.0,0,2.0,93,0,10042/202939811.png,10042_L,8,1.0
2,1,10042,1648588715,L,MLO,51.0,0,0,0,1.0,0,2.0,93,0,10042/1648588715.png,10042_L,8,1.0
3,1,10042,294481519,R,CC,51.0,0,0,0,1.0,0,2.0,93,0,10042/294481519.png,10042_R,9,1.0
4,1,10042,495770405,R,MLO,51.0,0,0,0,1.0,0,2.0,93,0,10042/495770405.png,10042_R,9,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5515,2,9968,568005453,R,CC,76.0,0,0,0,-1.0,0,-1.0,48,0,9968/568005453.png,9968_R,23819,1.0
5516,1,9973,1345265157,L,CC,43.0,0,0,0,1.0,0,2.0,49,0,9973/1345265157.png,9973_L,23820,1.0
5517,1,9973,1703611570,L,MLO,43.0,0,0,0,1.0,0,2.0,49,0,9973/1703611570.png,9973_L,23820,1.0
5518,1,9973,655941939,R,CC,43.0,0,0,0,1.0,0,2.0,49,0,9973/655941939.png,9973_R,23821,1.0


In [7]:
pred_df1 = run_test(test_df, [model1, model2, model3])

100%|██████████| 173/173 [08:08<00:00,  2.82s/it]


In [9]:
pred_df1["y_pred3"] = pred_df1.y_pred0 + pred_df1.y_pred1 + pred_df1.y_pred2
for i in range(4):
    auc = roc_auc_score(pred_df1.y_true.values, pred_df1[f"y_pred{i}"].values)
    print("AUC:", f"{auc:0.4f}")

pred_df1_grouped = pred_df1.groupby("breast_id").mean(numeric_only=True)
print("\n")

for i in range(4):
    auc = roc_auc_score(pred_df1_grouped.y_true.values, pred_df1_grouped[f"y_pred{i}"].values)
    print("AUC:", f"{auc:0.4f}")

AUC: 0.8649
AUC: 0.8684
AUC: 0.8735
AUC: 0.8883


AUC: 0.9257
AUC: 0.9370
AUC: 0.9228
AUC: 0.9464


## Split 2

In [10]:
cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/c55fbaf7/fold0/checkpoints/ema_weights.pt"
model1 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses_v02"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/fa6df1c9/fold1/checkpoints/ema_weights.pt"
model2 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses_v04"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/cbbc1071/fold2/checkpoints/ema_weights.pt"
model3 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

test_df = pd.read_csv("/mnt/stor/datasets/kaggle/rsna-breast-cancer-detection/holdout_test_v02.csv")

pred_df2 = run_test(test_df, [model1, model2, model3])

Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses/c55fbaf7/fold0/checkpoints/ema_weights.pt ...
Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses_v02/fa6df1c9/fold1/checkpoints/ema_weights.pt ...
Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses_v04/cbbc1071/fold2/checkpoints/ema_weights.pt ...


100%|██████████| 172/172 [08:04<00:00,  2.82s/it]


In [11]:
pred_df2["y_pred3"] = pred_df2.y_pred0 + pred_df2.y_pred1 + pred_df2.y_pred2
for i in range(4):
    auc = roc_auc_score(pred_df2.y_true.values, pred_df2[f"y_pred{i}"].values)
    print("AUC:", f"{auc:0.4f}")

pred_df2_grouped = pred_df2.groupby("breast_id").mean(numeric_only=True)
print("\n")

for i in range(4): 
    auc = roc_auc_score(pred_df2_grouped.y_true.values, pred_df2_grouped[f"y_pred{i}"].values)
    print("AUC:", f"{auc:0.4f}")

AUC: 0.8847
AUC: 0.8855
AUC: 0.8919
AUC: 0.9029


AUC: 0.9284
AUC: 0.9169
AUC: 0.9359
AUC: 0.9467


## Split 3

In [12]:
cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/6b00696a/fold0/checkpoints/ema_weights.pt"
model1 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses_v02"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/387cf1fd/fold1/checkpoints/ema_weights.pt"
model2 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

cfg_name = "mammo.cfg_ddsm_pretrained_aux_losses_v04"
cfg = import_module(f"skp.configs.{cfg_name}").cfg
cfg.pretrained = False
cfg.load_pretrained_backbone = False
cfg.enable_gradient_checkpointing = False

weights_path = cfg.save_dir + cfg_name + "/9c9a8feb/fold2/checkpoints/ema_weights.pt"
model3 = load_model_from_config(cfg, weights_path, device="cuda:0", eval_mode=True)

test_df = pd.read_csv("/mnt/stor/datasets/kaggle/rsna-breast-cancer-detection/holdout_test_v03.csv")

pred_df3 = run_test(test_df, [model1, model2, model3])

Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses/6b00696a/fold0/checkpoints/ema_weights.pt ...
Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses_v02/387cf1fd/fold1/checkpoints/ema_weights.pt ...
Loading weights from /home/ian/projects/SKP/experiments/mammo/mammo.cfg_ddsm_pretrained_aux_losses_v04/9c9a8feb/fold2/checkpoints/ema_weights.pt ...


100%|██████████| 172/172 [08:05<00:00,  2.82s/it]


In [13]:
pred_df3["y_pred3"] = pred_df3.y_pred0 + pred_df3.y_pred1 + pred_df3.y_pred2
for i in range(4):
    auc = roc_auc_score(pred_df3.y_true.values, pred_df3[f"y_pred{i}"].values)
    print("AUC:", f"{auc:0.4f}")

pred_df3_grouped = pred_df3.groupby("breast_id").mean(numeric_only=True)
print("\n")

for i in range(4): 
    auc = roc_auc_score(pred_df3_grouped.y_true.values, pred_df3_grouped[f"y_pred{i}"].values)
    print("AUC:", f"{auc:0.4f}")

AUC: 0.8877
AUC: 0.8965
AUC: 0.8886
AUC: 0.9054


AUC: 0.9225
AUC: 0.9253
AUC: 0.9258
AUC: 0.9422
