# _Trial 10: Creating a DL Model for Atelectasis_

__Focus__: Utilizing U-Ignore Uncertainty Approach to create preliminary model then predict labels that were originally labeled uncertain

In [1]:
from capstone import data
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *

In [2]:
# import libraries
import pandas as pd
pd.options.display.max_columns = None
import numpy as np
import random
import os

# Matplotlib
%matplotlib inline
%config InlineBackend.figure_format='retina'
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

In [3]:
path = Config.data_path()
path

PosixPath('/home/jupyter/springboard-capstone-2/data')

In [34]:
chexpert_path = path/'CheXpert-v1.0-small'
(chexpert_path).ls()

[PosixPath('/home/jupyter/springboard-capstone-2/data/CheXpert-v1.0-small/valid'),
 PosixPath('/home/jupyter/springboard-capstone-2/data/CheXpert-v1.0-small/train'),
 PosixPath('/home/jupyter/springboard-capstone-2/data/CheXpert-v1.0-small/train.csv'),
 PosixPath('/home/jupyter/springboard-capstone-2/data/CheXpert-v1.0-small/valid.csv')]

In [4]:
# take 5% of data with a seed of 1
train_df, valid_df = data.data_processing(path, 'Atelectasis', 1, 0.05);

(223414, 19)
(234, 19)
Extracted Patient ID from Path column and created new column named Patient_id
Created function named seed_data to set seed for sample data
Seeded data.
Created sample dataframe with input seed.
Created training and validation dataframe with expressed competition task, replacing NaN's with 0 and resetting the index
Atelectasis
Training shape
(11576, 2)

Validation shape
(234, 2)


In [5]:
train_df.head()

Unnamed: 0,Path,Atelectasis
0,CheXpert-v1.0-small/train/patient00003/study1/...,0.0
1,CheXpert-v1.0-small/train/patient00015/study1/...,0.0
2,CheXpert-v1.0-small/train/patient00015/study2/...,-1.0
3,CheXpert-v1.0-small/train/patient00028/study1/...,-1.0
4,CheXpert-v1.0-small/train/patient00028/study2/...,1.0


In [6]:
valid_df.head()

Unnamed: 0,Path,Atelectasis
0,CheXpert-v1.0-small/valid/patient64541/study1/...,0.0
1,CheXpert-v1.0-small/valid/patient64542/study1/...,0.0
2,CheXpert-v1.0-small/valid/patient64542/study1/...,0.0
3,CheXpert-v1.0-small/valid/patient64543/study1/...,0.0
4,CheXpert-v1.0-small/valid/patient64544/study1/...,0.0


# _U-Ignore_: Drop -1* Values

(-1) = Uncertain labels

In [7]:
train_df['Atelectasis'].value_counts(normalize=True)

 0.0    0.698601
 1.0    0.152903
-1.0    0.148497
Name: Atelectasis, dtype: float64

In [8]:
train_df['Atelectasis'].value_counts()

 0.0    8087
 1.0    1770
-1.0    1719
Name: Atelectasis, dtype: int64

In [9]:
u_ignore_train_df = train_df[train_df['Atelectasis'] != -1].reset_index(drop=True)

In [10]:
type(u_ignore_train_df)

pandas.core.frame.DataFrame

In [11]:
u_ignore_train_df['Atelectasis'].value_counts(normalize=True)

0.0    0.820432
1.0    0.179568
Name: Atelectasis, dtype: float64

In [12]:
u_ignore_train_df.shape

(9857, 2)

In [27]:
u_ignore_train_df.head()

Unnamed: 0,Path,Atelectasis
0,CheXpert-v1.0-small/train/patient00003/study1/...,0.0
1,CheXpert-v1.0-small/train/patient00015/study1/...,0.0
2,CheXpert-v1.0-small/train/patient00028/study2/...,1.0
3,CheXpert-v1.0-small/train/patient00039/study1/...,0.0
4,CheXpert-v1.0-small/train/patient00039/study1/...,0.0


# DL Model Training: Set-Up

In [13]:
# transformations to be done to images (if needed)
tfms = get_transforms(do_flip=False, flip_vert=False)

In [35]:
# create imagelist from u_ignore_train_df
src = (ImageList.from_df(chexpert_path, u_ignore_train_df, folder='', suffix=''))

AttributeError: 'PosixPath' object has no attribute 'iloc'

In [15]:
src = src.split_by_rand_pct(0.1)

In [16]:
free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: 
    bs=32
else:           
    bs=16
print(f"using bs={bs}, have {free}MB of GPU RAM free")

using bs=16, have 7601MB of GPU RAM free


In [17]:
from sklearn.metrics import roc_auc_score

class AUC(Callback):
    "AUC score"
    def __init__(self):
        pass
    
    def on_epoch_begin(self, **kwargs): 
        self.outputs = []
        self.targets = []

    def on_batch_end(self, last_output, last_target, **kwargs):
        "expects binary output with data.c=2 "
        self.outputs += list(to_np(last_output)[:, 1])
        self.targets += list(to_np(last_target))

    def on_epoch_end(self, last_metrics, **kwargs): 
        return {'last_metrics': last_metrics + [roc_auc_score(self.targets, self.outputs)]}

auc = AUC()

In [18]:
def auroc_score(input, target):
    input, target = input.cpu().numpy()[:,1], target.cpu().numpy()
    return roc_auc_score(target, input)

class AUROC(Callback):
    _order = -20 #Needs to run before the recorder

    def __init__(self, learn, **kwargs): self.learn = learn
    def on_train_begin(self, **kwargs): self.learn.recorder.add_metric_names(['AUROC'])
    def on_epoch_begin(self, **kwargs): self.output, self.target = [], []
    
    def on_batch_end(self, last_target, last_output, train, **kwargs):
        if not train:
            self.output.append(last_output)
            self.target.append(last_target)
                
    def on_epoch_end(self, last_target, last_output, **kwargs):
        if len(self.output) > 0:
            output = torch.cat(self.output)
            target = torch.cat(self.target)
            preds = F.softmax(output, dim=1)
            metric = auroc_score(preds, target)
            self.learn.recorder.add_metrics([metric])
            
auroc = AUROC

In [19]:
def accuracy_1(input:Tensor, targs:Tensor)->Rank0Tensor:
    '''Compute accuracy with targs when input is bs * n_classes.'''
    targs = targs.view(-1).long()
    n = targs.shape[0]
    input = input.argmax(dim=-1).view(n,-1)
    targs = targs.view(n,-1)
    return (input==targs).float().mean()

# Create Data For Model

In [20]:
acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2, beta=1)

In [21]:
data = (src.label_from_df(cols='Atelectasis')
        .transform(tfms, size=320)
        .databunch(bs = bs)
        .normalize(imagenet_stats))

In [24]:
data.c

1

In [25]:
data.classes

AttributeError: classes

In [None]:
from torch import nn

In [None]:
learn10 = cnn_learner(data=data, base_arch=models.densenet121, metrics=accuracy)

In [None]:
learn10.lr_find(start_lr=1e-7, end_lr=100)
learn10.recorder.plot(suggestion=True)

# Rd. 1 Training

In [None]:
lr = learn10.recorder.min_grad_lr
lr

In [None]:
learn10.fit_one_cycle(5, lr)