In [1]:
from exp.utils import *
from exp.models import *
from exp.losses import *
from tqdm.notebook import tqdm
from multiprocessing import Pool
from PIL import Image

import torch
import torch.nn as NN
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms

from matplotlib.colors import LinearSegmentedColormap
from captum.attr import visualization as viz
from captum.attr import IntegratedGradients
from captum.attr import GradientShap
from captum.attr import DeepLift
from captum.attr import Occlusion
from captum.attr import NoiseTunnel
from captum.attr import visualization as viz
from captum.attr import Saliency

from ipywidgets import interact

In [None]:
model_name,"_".join(model_name.split("_")[:-1])

In [None]:
get_insights_path(model_name).parent.ls()

In [2]:
model_name = f"DenseNet121_v1_Atelectasis"

In [23]:


def get_insights_path(model_name):
    wd = get_working_dir()
    dirs = [f.stem for f in wd.ls() if f.is_dir()]
    if "insights" not in dirs:
        (wd/"insights").mkdir(mode=0o777, parents=False, exist_ok=False)
    d = wd/"insights"
    dirs = [f.stem for f in d.ls() if f.is_dir()]
    if model_name not in dirs:
        (d/model_name).mkdir(mode=0o777, parents=False, exist_ok=False)
    return d/model_name

def load_model_metrics(model_name):
    model_stem = "_".join(model_name.split("_")[:-1])
    parent_dir = get_insights_path(model_name).parent
    stem_dirs = [d for d in parent_dir.ls() if d.is_dir()]
    for idx, stem_dir in enumerate(stem_dirs):
        extracted_label = stem_dir.name.split("_")[-1]
        complete_model_name = stem_dir/model_metrics_name(f"{model_stem}_{extracted_label}")
        if idx == 0:
            metrics = load_metrics(complete_model_name)
        else:
            metrics = pd.concat([metrics, load_metrics(complete_model_name)], axis=1)
    return metrics

def load_metrics(model_name):
    absolute_path = len(str(model_name).split("/")) > 1
    if not absolute_path:
        ipath = get_insights_path(model_name)
        saved_metrics = pd.read_csv(str(ipath/model_metrics_name(model_name)))
    else:
        saved_metrics = pd.read_csv(str(model_name))
    tmp = saved_metrics["Unnamed: 0"].values
    saved_metrics = saved_metrics.drop(columns="Unnamed: 0")
    saved_metrics.index = tmp
    return saved_metrics

def save_metrics(model_metrics, model_name):
    ipath = get_insights_path(model_name)
    model_metrics.to_csv(str(ipath/model_metrics_name(model_name)))
    print(f"Saved metrics to '{model_metrics_name(model_name)}'")
    #print(f"Saved metrics to '{str(ipath/model_metrics_name(model_name))}'")
    

In [21]:
 len(str("DenseNet121_v1_Atelectasis").split("/")) > 1

False

In [24]:
load_model_metrics(model_name)

Unnamed: 0,DenseNet121_v1_Atelectasis,DenseNet121_v1_Pneumonia,DenseNet121_v1_Cardiomegaly
loss,0.124008,0.03124,0.035327
acc,0.737295,0.482408,0.827209
auroc,0.694895,0.561636,0.874979
threshold,0.634645,0.506866,0.748586


In [None]:
load_metrics(model_name)

In [None]:
DenseNet121_v1_Atelectasis/DenseNet121_v1_Pneumonia_metrics.csv/DenseNet121_v1_Pneumonia_metrics.csv

In [3]:
picked_labels = ["Atelectasis", "Cardiomegaly", "Pneumonia"]

In [13]:
for label in picked_labels:
    print(f"Computing metrics for '{label}'")
    
    seed = 92
    model_name = f"DenseNet121_v1_{label}"
    print(model_name)
    s = 224
    image_size = (s, s)
    labels = get_labels()
    bs = 16
    device = get_device()
    
    sigmoid = NN.Sigmoid()
    model = load_model(model_name)
    model.eval();
    model = model.to(device)
    
    seed_everything(seed=seed)
    
    train_df, _, test_df = get_dataframes(include_labels=labels, 
                                             small=True, small_fraction=0.05)

    train_df = get_binary_df(label, train_df)
    test_df = get_binary_df(label, test_df)

    train_label = train_df[[label]].values
    neg_weights, pos_weights = compute_class_freqs(train_label)
    neg_weights, pos_weights = torch.Tensor(neg_weights), torch.Tensor(pos_weights)

    _, test_tfs = get_transforms(image_size=image_size)

    test_ds  = CRX8_Data(test_df , get_image_path(), label, image_size=image_size, transforms=test_tfs)

    test_dl  = DataLoader(test_ds,  batch_size=bs, shuffle=False)

    criterion = get_weighted_loss_with_logits(pos_weights.to(device), 
                                              neg_weights.to(device))

    history = validate(model, 
             criterion, 
             test_dl, 
             model_name,
             device=device)
    
    tmp = history
    tmp["loss"] = tmp["loss"].mean()
    tmp = {k:[v] for k, v in tmp.items()}
    model_metrics = pd.DataFrame(tmp.values(), index=tmp.keys(), columns=[model_name])
    save_metrics(model_metrics, model_name)
    
FERTIG()

Computing metrics for 'Atelectasis'
DenseNet121_v1_Atelectasis
Using the GPU!




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=80.0), HTML(value='')))


Val: Loss: 0.124, Acc: 0.737, AUROC: 0.695
Saved metrics to '/home/favi/work/crx8/insights/DenseNet121_v1_Atelectasis/DenseNet121_v1_Atelectasis_metrics.csv'
Computing metrics for 'Cardiomegaly'
DenseNet121_v1_Cardiomegaly
Using the GPU!


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=80.0), HTML(value='')))


Val: Loss: 0.035, Acc: 0.827, AUROC: 0.875
Saved metrics to '/home/favi/work/crx8/insights/DenseNet121_v1_Cardiomegaly/DenseNet121_v1_Cardiomegaly_metrics.csv'
Computing metrics for 'Pneumonia'
DenseNet121_v1_Pneumonia
Using the GPU!


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=80.0), HTML(value='')))


Val: Loss: 0.031, Acc: 0.482, AUROC: 0.562
Saved metrics to '/home/favi/work/crx8/insights/DenseNet121_v1_Pneumonia/DenseNet121_v1_Pneumonia_metrics.csv'
FERTIG! :D


In [None]:
model_metrics

In [None]:
save_metrics(model_metrics, model_name)

In [None]:
load_metrics(model_name)

In [None]:
[f.name for f in get_insights_path(model_name).ls()]

In [None]:
model_metrics_name(model_name)

In [None]:
seed = 92

In [None]:
label = "Atelectasis"
model_name = f"DenseNet121_v1_{label}"
s = 224
image_size = (s, s)#(224, 224)
labels = get_labels()
bs = 16
device = get_device()
#cpu = get_cpu()

In [None]:
seed_everything(seed=seed)

train_df, valid_df, test_df = get_dataframes(include_labels=labels, 
                                             small=False)
print(train_df.shape, valid_df.shape, test_df.shape)
train_df = get_binary_df(label, train_df)
valid_df = get_binary_df(label, valid_df)
test_df = get_binary_df(label, test_df)

train_label = train_df[[label]].values
neg_weights, pos_weights = compute_class_freqs(train_label)
neg_weights, pos_weights = torch.Tensor(neg_weights), torch.Tensor(pos_weights)
print(neg_weights, pos_weights)

train_tfs, test_tfs = get_transforms(image_size=image_size)

train_ds = CRX8_Data(train_df, get_image_path(), label, image_size=image_size, transforms=train_tfs)
valid_ds = CRX8_Data(valid_df, get_image_path(), label, image_size=image_size, transforms=test_tfs)
test_ds  = CRX8_Data(test_df , get_image_path(), label, image_size=image_size, transforms=test_tfs)

train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=False)
test_dl  = DataLoader(test_ds,  batch_size=bs, shuffle=False)
dataloaders = {
    "train": train_dl,
    "val": valid_dl,
    "test": test_dl
}

criterion = get_weighted_loss_with_logits(pos_weights.to(device), 
                                          neg_weights.to(device))

In [None]:
model = load_model(model_name)
model.eval();
model = model.to(device)

In [None]:
sigmoid = NN.Sigmoid()

In [None]:
images, probabilities, truths = [], [], []

In [None]:
running_loss, running_y_hat, running_y = [], [], []

with torch.no_grad():

    datalooper = tqdm(test_dl)
    for X, y in datalooper:
        X, y = X.to(device), y.to(device)
        logits = model(X)
        predictions = sigmoid(logits)
        
        images = [*images, *X.cpu().detach().numpy()]
        running_y_hat = [*running_y_hat, *predictions.cpu().detach().numpy()]
        running_y = [*running_y, *y.cpu().detach().numpy()]
        break

images = np.array(images)
running_y_hat = np.array(running_y_hat)
running_y = np.array(running_y)

auroc, threshold = auroc_score(running_y_hat, running_y, model_name)
acc = accuracy_score(running_y, (running_y_hat>threshold))

In [None]:
acc, threshold, auroc

In [None]:
thruths = running_y.reshape(running_y.shape[0])
probabilities = running_y_hat.reshape(running_y_hat.shape[0])

In [None]:
images = np.array(images)
images.shape

In [None]:
label = "Atelectasis"
model_name = f"DenseNet121_v1_{label}"
s = 224
image_size = (s, s)#(224, 224)
labels = get_labels()
bs = 1
device = get_cpu()

In [None]:
seed_everything(seed=seed)

train_df, valid_df, test_df = get_dataframes(include_labels=labels, 
                                             small=False)
print(train_df.shape, valid_df.shape, test_df.shape)
train_df = get_binary_df(label, train_df)
valid_df = get_binary_df(label, valid_df)
test_df = get_binary_df(label, test_df)

train_label = train_df[[label]].values
neg_weights, pos_weights = compute_class_freqs(train_label)
neg_weights, pos_weights = torch.Tensor(neg_weights), torch.Tensor(pos_weights)
print(neg_weights, pos_weights)

train_tfs, test_tfs = get_transforms(image_size=image_size)

train_ds = CRX8_Data(train_df, get_image_path(), label, image_size=image_size, transforms=train_tfs)
valid_ds = CRX8_Data(valid_df, get_image_path(), label, image_size=image_size, transforms=test_tfs)
test_ds  = CRX8_Data(test_df , get_image_path(), label, image_size=image_size, transforms=test_tfs)

train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=False)
test_dl  = DataLoader(test_ds,  batch_size=bs, shuffle=False)
dataloaders = {
    "train": train_dl,
    "val": valid_dl,
    "test": test_dl
}

criterion = get_weighted_loss_with_logits(pos_weights.to(device), 
                                          neg_weights.to(device))

In [None]:
default_cmap = LinearSegmentedColormap.from_list("orange",
                                                 [(0, '#000000'), (0.25, '#000000'), (1, '#fc7b02')], 
                                                 N=256)

In [None]:
pos_df = test_df[test_df[label] > 0.5]
neg_df = test_df[test_df[label] < 0.5]
pos_df = pos_df.iloc[:5, :]
neg_df = neg_df.iloc[:5, :]

pos_ds = CRX8_Data(pos_df , get_image_path(), label, image_size=image_size, transforms=test_tfs)
neg_ds = CRX8_Data(neg_df , get_image_path(), label, image_size=image_size, transforms=test_tfs)

pos_dl = DataLoader(pos_ds,  batch_size=bs, shuffle=False)
neg_dl = DataLoader(neg_ds,  batch_size=bs, shuffle=False)

In [None]:
pos_noise_tunnels, neg_noise_tunnels = [], []
pos_images, neg_images = [], []

In [None]:
model = model.to(device)

for X, y in tqdm(pos_dl):
    X, y = X.to(device), y.to(device)
    
    
    nt = NoiseTunnel(IntegratedGradients(model))
    attrs = []
    for _ in range(1):
        model.zero_grad()
        tmp = nt.attribute(X,
                           baselines=X * 0, 
                           nt_type='smoothgrad_sq',
                           nt_samples=3, stdevs=0.2)
        attrs.append(tmp.squeeze())
    
    attr_ig_nt = torch.zeros((len(attrs), *attrs[0].shape))
    for i in range(len(attrs)): attr_ig_nt[i] = attrs[i][0]

    attr_ig_nt = attr_ig_nt.mean(axis=0).cpu().detach().numpy()
    attr_ig_nt = np.einsum("cwh -> whc", attr_ig_nt) #np.transpose(attr_ig_nt, (1, 2, 0))
    
    pos_noise_tunnels = [*pos_noise_tunnels, attr_ig_nt]
    pos_images = [*pos_images, np.einsum("cwh -> whc", *X.cpu().detach().numpy())]

In [None]:
pos_noise_tunnels = np.array(pos_noise_tunnels)
pos_images = np.array(pos_images)

In [None]:
pos_noise_tunnels.shape, pos_images.shape

In [None]:


@interact(image=(0, 4), percentage=(1, 99))
def drawit(image, percentage):
    _=viz.visualize_image_attr(pos_noise_tunnels[image], 
                             pos_images[image], 
                             method="blended_heat_map", 
                             sign="absolute_value", 
                             outlier_perc=percentage, 
                             show_colorbar=True,
                             cmap=default_cmap,
                             title="Overlayed Integrated Gradients \n with SmoothGrad Squared")

In [None]:
[f.stem for f in get_working_dir().ls() if f.is_dir()]

In [None]:
pos_images_name = lambda m: f"{m}_pos_images.npy"
neg_images_name = lambda m: f"{m}_neg_images.npy"

pos_nt_name = lambda m: f"{m}_pos_noisetunnnels.npy"
neg_nt_name = lambda m: f"{m}_neg_noisetunnnels.npy"

def get_insights_path(model_name):
    wd = get_working_dir()
    dirs = [f.stem for f in wd.ls() if f.is_dir()]
    if "insights" not in dirs:
        (wd/"insights").mkdir(mode=0o777, parents=False, exist_ok=False)
    d = wd/"insights"
    dirs = [f.stem for f in d.ls() if f.is_dir()]
    if model_name not in dirs:
        (d/model_name).mkdir(mode=0o777, parents=False, exist_ok=False)
    return d/model_name

def save_insights(pos_images, neg_images, pos_grads, neg_grads, model_name):
    ipath = get_insights_path(model_name)
    np.save(ipath/pos_images_name(model_name), pos_images)
    np.save(ipath/neg_images_name(model_name), neg_images)
    np.save(ipath/pos_nt_name(model_name), pos_grads)
    np.save(ipath/neg_nt_name(model_name), neg_grads)

In [None]:
mp = get_insights_path(model_name)
mp

In [None]:
(get_working_dir()/"skjdf").mkdir(mode=0o777, parents=False, exist_ok=False)

In [None]:
!ls

In [None]:
ipath = get_insights_path(model_name)
np.save(ipath/pos_images_name(model_name), pos_images)
ipath/pos_images_name(model_name)