## Import Modules

In [1]:

import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import densenet121e
import pydicom
from PIL import Image
import ast

In [6]:
# fix all sources of randomness
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark= False

In [2]:
def load_dicom_as_pil(path):
    ds = pydicom.dcmread(path)
    img = ds.pixel_array.astype(np.float32)
    img = (img - img.min()) / (img.max() - img.min() + 1e-8)
    img = (img * 255).round().astype(np.uint8)
    return Image.fromarray(img)
    

In [3]:
# set the train mean and std
train_mean = 0.5037
train_std = 0.2510

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std),
])

## Import Data + Model

Class 0 
Class 1
Class 2
Class 3 == white

In [2]:
# ─── 1) FIX ALL SOURCES OF RANDOMNESS ────────────────────────────────
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# ─── 2) DEVICE ───────────────────────────────────────────────────────
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print("Using device:", device)

# ─── 3) HARD-CODE TRAINING MEAN & STD ────────────────────────────────
TRAIN_MEAN = 0.5037
TRAIN_STD  = 0.2510

# ─── 4) TRANSFORM FOR TEST IMAGES ────────────────────────────────────
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([TRAIN_MEAN], [TRAIN_STD])
])

# ─── 5) DATASET & DATALOADER ────────────────────────────────────────
class DicomDataset(Dataset):
    def __init__(self, items, labels_dict, label2idx, transform):
        self.items     = items
        self.labels    = labels_dict
        self.label2idx = label2idx
        self.transform = transform

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        pid, path = self.items[idx]
        ds         = pydicom.dcmread(path)
        arr        = ds.pixel_array.astype(np.float32)

        # normalize to [0,1] using numpy.ptp
        arr_min = arr.min()
        arr_ptp = np.ptp(arr)
        arr_norm = (arr - arr_min) / (arr_ptp + 1e-6)

        # convert back to 8-bit and apply transforms
        img = Image.fromarray((arr_norm * 255).astype(np.uint8))
        img = self.transform(img)

        lbl = self.labels[(pid, path)]
        return img, self.label2idx[lbl]

# ─── 6) LOAD TEST ITEMS & LABELS ────────────────────────────────────
local_path_stem = "/Users/Kyra_1/Desktop/local_ADS_data/physionet.org/files/mimic-cxr/2.1.0/"
df_preds        = pd.read_csv("five_epoch_pred.csv")

# build label mapping
all_labels   = df_preds['true_label']
unique_labels = sorted(all_labels.unique())
label2idx     = {lab: i for i, lab in enumerate(unique_labels)}

# collect (pid, full_path) and labels_dict
test_items  = []
labels_dict = {}
for _, row in df_preds.iterrows():
    pid        = row['subject_id']
    other_path = row['dicom_path'].split('2.1.0')[1]
    full_path  = os.path.join(local_path_stem, other_path.lstrip('/'))
    if not os.path.exists(full_path):
        continue
    test_items.append((pid, full_path))
    labels_dict[(pid, full_path)] = row['true_label']

# create DataLoader
test_loader = DataLoader(
    DicomDataset(test_items, labels_dict, label2idx, test_transform),
    batch_size=8,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

# ─── 7) BUILD MODEL & LOAD WEIGHTS ─────────────────────────────────
num_classes = len(unique_labels)

model = densenet121(pretrained=True)
# swap first conv to accept 1 channel
old = model.features.conv0
new = nn.Conv2d(
    1, old.out_channels,
    old.kernel_size, old.stride, old.padding,
    bias=(old.bias is not None)
)
with torch.no_grad():
    new.weight[:] = old.weight.mean(dim=1, keepdim=True)
    if old.bias is not None:
        new.bias[:] = old.bias
model.features.conv0 = new

# replace classifier
model.classifier = nn.Linear(model.classifier.in_features, num_classes)
model = model.to(device)

# load your trained weights
model.load_state_dict(torch.load("reproduceable_densenet.pt", map_location=device))
model.eval()

# ─── 8) RUN INFERENCE & SAVE TO CSV ────────────────────────────────
results = []
batch_size = test_loader.batch_size

with torch.no_grad():
    for batch_idx, (imgs, labs) in enumerate(test_loader):
        imgs  = imgs.to(device)
        logits = model(imgs)
        probs  = torch.softmax(logits, dim=1)
        preds  = probs.argmax(dim=1)

        for b in range(imgs.size(0)):
            global_idx = batch_idx * batch_size + b
            pid, path  = test_items[global_idx]
            true_lbl   = unique_labels[labs[b].item()]
            pred_lbl   = unique_labels[preds[b].item()]
            prob_vals  = probs[b].cpu().numpy().tolist()

            row = {
                "patient_id":      pid,
                "full_path":       path,
                "true_label":      true_lbl,
                "predicted_label": pred_lbl,
            }
            for j, p in enumerate(prob_vals):
                row[f"prob_class_{j}"] = p

            results.append(row)

# write out
out_df = pd.DataFrame(results)
out_df.to_csv("predictions.csv", index=False)
print(f"Saved {len(out_df)} predictions to predictions.csv")


Using device: cpu




Saved 19 predictions to predictions.csv


In [3]:
import ast

from_test = pd.read_csv('reproduceable_model_pred.csv')
jiwoo_stem = '/Users/jiwoo_noh/Downloads'

from_loading = pd.read_csv('predictions.csv')

for _, row in from_loading.iterrows():
    dicom_id = row['full_path'].split('local_ADS_data')[1]
    search_path = jiwoo_stem+ dicom_id
    prob_list = [row['prob_class_0'],row['prob_class_1'],row['prob_class_2'],row['prob_class_3']]

    mask = from_test['dicom_path'] == search_path
    if not mask.any():
        print(f"{search_path!r} not found in from_test")
        continue

    # grab the packed‐string out of your test set
    raw = from_test.loc[mask, 'probabilities'].iloc[0]

    # if it’s a string, turn it into a Python list
    if isinstance(raw, str):
        raw = ast.literal_eval(raw)

    # make numpy arrays
    a = np.array(raw, dtype=float)
    b = np.array(prob_list, dtype=float)

    # element-wise difference: test − pipeline
    diffs = a - b

    # some simple metrics
    l1      = np.sum(np.abs(diffs))         # L₁ norm (sum of abs diffs) - try the mean (torch.mean) so that the loss fluctuates between 0 and 1 and not the entire dimensions 
    l2      = np.linalg.norm(diffs)         # L₂ norm (Euclidean)
    max_abs = np.max(np.abs(diffs))         # maximum absolute difference
    mae     = np.mean(np.abs(diffs))        # mean absolute error

    #print(" diffs         :", diffs.tolist())
    print(f" L1   = {l1:.6f}, L2 = {l2:.6f}, max_abs = {max_abs:.6f}, MAE = {mae:.6f}")

    

 L1   = 0.000001, L2 = 0.000000, max_abs = 0.000000, MAE = 0.000000
 L1   = 0.000030, L2 = 0.000020, max_abs = 0.000015, MAE = 0.000007
 L1   = 0.000085, L2 = 0.000056, max_abs = 0.000043, MAE = 0.000021
 L1   = 0.000030, L2 = 0.000021, max_abs = 0.000015, MAE = 0.000008
 L1   = 0.000031, L2 = 0.000021, max_abs = 0.000016, MAE = 0.000008
 L1   = 0.000002, L2 = 0.000002, max_abs = 0.000001, MAE = 0.000001
 L1   = 0.000002, L2 = 0.000001, max_abs = 0.000001, MAE = 0.000001
 L1   = 0.000029, L2 = 0.000019, max_abs = 0.000014, MAE = 0.000007
 L1   = 0.000001, L2 = 0.000001, max_abs = 0.000000, MAE = 0.000000
 L1   = 0.000043, L2 = 0.000026, max_abs = 0.000022, MAE = 0.000011
 L1   = 0.000001, L2 = 0.000000, max_abs = 0.000000, MAE = 0.000000
 L1   = 0.000002, L2 = 0.000001, max_abs = 0.000001, MAE = 0.000000
 L1   = 0.000000, L2 = 0.000000, max_abs = 0.000000, MAE = 0.000000
 L1   = 0.000000, L2 = 0.000000, max_abs = 0.000000, MAE = 0.000000
 L1   = 0.000021, L2 = 0.000015, max_abs = 0.000

## Masks

Can make more complicated later?? 

In [None]:
# create a learnable mask with different initializations to capture different solutions
mask = torch.nn.Parameter(torch.rand(1, 1, 224, 224), requires_grad=True)
optimizer = torch.optim.Adam([mask], lr=1e-2)

In [None]:
masks = []

# set the number of different sets you would like

for _ in range(10):  
    mask = torch.nn.Parameter(torch.rand(1, 1, 224, 224), requires_grad=True)
    optimizer = torch.optim.Adam([mask], lr=1e-2)

    for i in range(500):
        masks.append(mask.detach().clone())

NameError: name 'N' is not defined