In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os,sys
sys.path.insert(0,"..")
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import pandas as pd
import tqdm
import sklearn, sklearn.metrics
import torchxrayvision as xrv
from main.aims.aim1_1_taxonomy.utils_taxonomy import reading_user_input_arguments,AIM1_1_TorchXrayVision, LoadModelXRV, LoadChestXrayDatasets, Hierarchy

In [4]:
# Getting the user arguments
config = reading_user_input_arguments(jupyter=True)

aim1_1 = AIM1_1_TorchXrayVision(config=config, seed=10 )

# Loading train/test data as well as the pre-trained model
aim1_1.train, aim1_1.test, aim1_1.model = AIM1_1_TorchXrayVision.load_data_and_model(config)

{'Aortic Atheromatosis', 'Air Trapping', 'Support Devices', 'Granuloma', 'Bronchiectasis', 'Hilar Enlargement', 'Aortic Elongation', 'Flattened Diaphragm', 'Scoliosis', 'Tuberculosis', 'Hemidiaphragm Elevation', 'Costophrenic Angle Blunting', 'Tube'} will be dropped
Lung Lesion doesn't exist. Adding nans instead.
Lung Opacity doesn't exist. Adding nans instead.
Enlarged Cardiomediastinum doesn't exist. Adding nans instead.
Parent class: Lung Opacity is not in the dataset


In [14]:
# Getting the user arguments
config = reading_user_input_arguments(jupyter=True)

DT = LoadChestXrayDatasets(config=config, pathologies_in_model=aim1_1.model.pathologies)
DT.load()

DT.d_data

Lung Lesion doesn't exist. Adding nans instead.
Fracture doesn't exist. Adding nans instead.
Lung Opacity doesn't exist. Adding nans instead.
Enlarged Cardiomediastinum doesn't exist. Adding nans instead.
Parent class: Lung Opacity is not in the dataset
Parent class: Enlarged Cardiomediastinum is not in the dataset
{'Atelectasis': {0.0: 27311, 1.0: 1557},
 'Cardiomegaly': {0.0: 28139, 1.0: 729},
 'Consolidation': {0.0: 28484, 1.0: 384},
 'Edema': {0.0: 28829, 1.0: 39},
 'Effusion': {0.0: 27599, 1.0: 1269},
 'Emphysema': {0.0: 28604, 1.0: 264},
 'Enlarged Cardiomediastinum': {0.0: 28139, 1.0: 729},
 'Fibrosis': {0.0: 28312, 1.0: 556},
 'Fracture': {},
 'Hernia': {0.0: 28787, 1.0: 81},
 'Infiltration': {0.0: 25595, 1.0: 3273},
 'Lung Lesion': {},
 'Lung Opacity': {0.0: 23951, 1.0: 4917},
 'Mass': {0.0: 27655, 1.0: 1213},
 'Nodule': {0.0: 27259, 1.0: 1609},
 'Pleural_Thickening': {0.0: 28123, 1.0: 745},
 'Pneumonia': {0.0: 28693, 1.0: 175},
 'Pneumothorax': {0.0: 28625, 1.0: 243}}


SubsetDataset num_samples=28868
└ of SubsetDataset num_samples=28868
  └ of SubsetDataset num_samples=28868
    └ of SubsetDataset num_samples=28868
      └ of NIH_Dataset num_samples=28868 views=['PA'] data_aug=None

In [None]:
aim1_1.model.classifier


In [None]:
# Use XRV transforms to crop and resize the images
transforms = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),
                                             xrv.datasets.XRayResizer(224)])

# Load Google dataset and PyTorch dataloader
dataset = xrv.datasets.NIH_Google_Dataset(imgpath="/Users/ieee8023/Datasets/NIH/images-224",
                                          transform=transforms)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)

# Load pre-trained model and erase classifier
model = xrv.models.DenseNet(weights="densenet121-res224-all")
model.op_threshs = None # prevent pre-trained model calibration
model.classifier = torch.nn.Linear(1024,1) # reinitialize classifier

optimizer = torch.optim.Adam(model.classifier.parameters()) # only train classifier
criterion = torch.nn.BCEWithLogitsLoss()



In [None]:
# training loop (can run on cpu)
for i, batch in enumerate(dataloader):
    if i > 20: break
    outputs = model(batch["img"])
    targets = batch["lab"][:, dataset.pathologies.index("Lung Opacity"), None]
    loss = criterion(outputs, targets)
    print(i, loss.detach().cpu().numpy())
    loss.backward()
    optimizer.step()

In [None]:
sample = dataset[0]

In [None]:
out = model(torch.from_numpy(sample["img"]).unsqueeze(0))
out = torch.sigmoid(out)

In [None]:
out

In [None]:
labels = []
preds = []
with torch.inference_mode():
    for i in range(20):
        sample = dataset[i]
        label = sample["lab"][dataset.pathologies.index("Lung Opacity")]
        labels.append(label)
        pred = model(torch.from_numpy(sample["img"]).unsqueeze(0))
        pred = torch.sigmoid(pred).detach().numpy()[0][0]
        preds.append(pred)
        print(label, pred)

In [None]:
sklearn.metrics.roc_auc_score(labels, preds)