## Import Modules

In [21]:
import torch
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torchvision.models import densenet121
import pydicom
import numpy as np
from PIL import Image
from torchvision import transforms
import pandas as pd
import os


In [61]:
def load_dicom_as_pil(path):
    ds = pydicom.dcmread(path)
    img = ds.pixel_array.astype(np.float32)
    img = (img - img.min()) / (img.max() - img.min() + 1e-8)
    img = (img * 255).round().astype(np.uint8)
    return Image.fromarray(img)

In [62]:
# set the train mean and std
train_mean = 0.5037
train_std = 0.2510

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std),
])

## Import Data + Model

Class 0 
Class 1
Class 2
Class 3 == white

In [63]:
# 2) Settings
device          = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint_path = "best_densenet_ethnicity.pth"
num_classes     = 4     # ← change to match your len(unique_labels)
IMG_SIZE        = 224   # ← whatever size you trained with

# 3) Re-instantiate your model exactly as in training
model = densenet121(pretrained=False)
# – swap in a 1-channel conv0
model.features.conv0 = nn.Conv2d(
    in_channels=1,
    out_channels=64,
    kernel_size=7,
    stride=2,
    padding=3,
    bias=False
)
# – swap in your 4-way classifier
model.classifier = nn.Linear(
    in_features=model.classifier.in_features,
    out_features=num_classes
)
model = model.to(device)

# 4) Load the weights
state_dict = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(state_dict)

# 5) Switch to eval mode
model.eval()


# 7) Inference on a single image
def predict_dicom(dicom_path):
    # read & preprocess
    pil_img = load_dicom_as_pil(dicom_path)
    x       = transform(pil_img).unsqueeze(0).to(device) 
    # forward
    with torch.no_grad():
        logits = model(x)
        probs  = torch.softmax(logits, dim=1)
    pred_idx = probs.argmax(dim=1).item()
    return pred_idx, probs.cpu().numpy()




In [64]:
# processing to go through all the available data (assuming all the available data is recorded in best_model_pred.csv)

local_path_stem = "/Users/Kyra_1/Desktop/local_ADS_data/physionet.org/files/mimic-cxr/2.1.0/"

successful_files = pd.read_csv("best_model_pred.csv")

all_labels = successful_files['true_label']
# recreate the label mapping from the training
unique_labels = sorted(all_labels.unique())
label2idx     = {lab: i for i, lab in enumerate(unique_labels)}
print(label2idx)

og_image_predictions = []
label_ambiguity_count = 0
label_ambiguity_ids = []

for _, row in successful_files.iterrows():
    patient_id = row['subject_id']
    other_path = row['dicom_path'].split('2.1.0')[1]
    full_path  = local_path_stem+ other_path
    train_label = row['predicted']
    if not os.path.exists(full_path):
        continue

    try:
        idx, probs = predict_dicom(full_path)
        if label2idx[train_label] != idx:
            label_ambiguity_count += 1
            label_ambiguity_ids.append(full_path)

    except:
        print(f'Skipped: {patient_id}')
        continue

    p0, p1, p2, p3 = probs.flatten().tolist()

    og_image_predictions.append({
        'patient_id': patient_id,
        'dicom_path': full_path,
        'class_idx':  idx,    
        'prob_0':      p0,
        'prob_1':      p1,
        'prob_2':      p2,
        'prob_3':      p3,
    })

pred_df = pd.DataFrame(og_image_predictions)
print(pred_df.head())
pred_df.to_csv("ethnicity_preds.csv", index=False)
print(pred_df.shape)
print(label_ambiguity_count)
print(label_ambiguity_ids)

{'ASIAN': 0, 'BLACK/AFRICAN AMERICAN': 1, 'HISPANIC/LATINO': 2, 'WHITE': 3}
   patient_id                                         dicom_path  class_idx  \
0    15000485  /Users/Kyra_1/Desktop/local_ADS_data/physionet...          3   
1    15000485  /Users/Kyra_1/Desktop/local_ADS_data/physionet...          3   
2    15000485  /Users/Kyra_1/Desktop/local_ADS_data/physionet...          3   
3    15002678  /Users/Kyra_1/Desktop/local_ADS_data/physionet...          3   
4    15002678  /Users/Kyra_1/Desktop/local_ADS_data/physionet...          3   

     prob_0    prob_1    prob_2    prob_3  
0  0.005292  0.015935  0.001419  0.977354  
1  0.019677  0.301652  0.007453  0.671218  
2  0.028240  0.144241  0.011913  0.815606  
3  0.042485  0.099779  0.017487  0.840249  
4  0.057175  0.052740  0.151839  0.738246  
(19, 7)
1
['/Users/Kyra_1/Desktop/local_ADS_data/physionet.org/files/mimic-cxr/2.1.0//files/p15/p15002678/s51171473/a57016f6-7e508b1c-2ae93482-05af10fa-6222510b.dcm']


0.09404128	0.1083391085267067	0.4549783170223236	0.34264135360717773
0.011608587577939034, 0.057486627250909805, 0.23680849373340607, 0.6940963268280029

In [54]:
print(model.training)

False


## Masks

Can make more complicated later?? 

In [None]:
# create a learnable mask with different initializations to capture different solutions
mask = torch.nn.Parameter(torch.rand(1, 1, 224, 224), requires_grad=True)
optimizer = torch.optim.Adam([mask], lr=1e-2)

In [None]:
masks = []

# set the number of different sets you would like

for _ in range(10):  
    mask = torch.nn.Parameter(torch.rand(1, 1, 224, 224), requires_grad=True)
    optimizer = torch.optim.Adam([mask], lr=1e-2)

    for i in range(500):
        masks.append(mask.detach().clone())

NameError: name 'N' is not defined