#### About

resnet50 with progressive resizing, first 224 then 512

#### Load libraries

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import fastai
from fastai.vision import *

In [None]:
# show versions
print('fastai:', fastai.__version__)
print('pytorch:', torch.__version__)
print('python:', sys.version.split('|')[0])

#### Examine dataset

In [None]:
# download the dataset
! mkdir -p /home/ubuntu/.fastai/data; 
! cd /home/ubuntu/.fastai/data; wget -nc https://data.mendeley.com/datasets/rscbjbr9sj/2/files/41d542e7-7f91-47f6-9ff2-dd8e5a5a7861/ChestXRay2017.zip; unzip -n -q ChestXRay2017.zip

In [None]:
# see what files we've got
path=Path('/home/ubuntu/.fastai/data/chest_xray')
path.ls()

In [None]:
Path(path/'train').ls()

In [None]:
# show some sample paths to the images
get_image_files(path/'train/NORMAL')[:5]

In [None]:
# look the shape and visualize one image
img = open_image(get_image_files(path/'train/NORMAL')[0])
print(img.shape)
img.show()

In [None]:
# see how much data there is
print('train/NORMAL:   ', len(os.listdir(path/'train/NORMAL')))
print('train/PNEUMONIA:', len(os.listdir(path/'train/PNEUMONIA')))
print('test/NORMAL:    ', len(os.listdir(path/'test/NORMAL')))
print('test/PNEUMONIA: ', len(os.listdir(path/'test/PNEUMONIA')))

#### Create datablock

In [None]:
# set hyperparamters
np.random.seed(42)
size = 224
bs = 64
num_workers = 4

In [None]:
# define data augmentation transformations
tfms = get_transforms()

Because we have a labeled test set, we'll be using the `test` directory as the valdation set.

In [None]:
src = (ImageItemList.from_folder(path)
       .split_by_folder(train='train', valid='test')
       .label_from_folder())

In [None]:
data = (src.transform(tfms, size=size)
        .databunch(bs=bs, num_workers=num_workers)
        .normalize(imagenet_stats))

#### Do sanity checks

In [None]:
# verify datasets loaded properly
print(len(data.train_ds))
print(len(data.valid_ds))

In [None]:
data.classes

In [None]:
data.show_batch(rows=3, figsize=(7,6))

#### Create model

In [None]:
# define architecture
arch = models.resnet50

In [None]:
# create learner
learn = create_cnn(data, arch, metrics=accuracy)

In [None]:
# find ideal learning rate
learn.lr_find()
learn.recorder.plot()

In [None]:
lr = 9.1e-3

#### Train on frozen model

In [None]:
# first fit the head of the model
learn.fit_one_cycle(8, slice(lr))

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.save('stage-1-size299')

#### Train on unfrozen model

In [None]:
learn.unfreeze()

In [None]:
# find ideal learning rate
learn.lr_find()
learn.recorder.plot()

In [None]:
# now fit across the entire model
learn.fit_one_cycle(3, max_lr=slice(1e-6, 1e-4))

In [None]:
learn.save('stage-2-size299')

In [None]:
learn.recorder.plot_losses()

#### Look at results

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
losses,idxs = interp.top_losses()
len(data.valid_ds)==len(losses)==len(idxs)

In [None]:
interp.plot_top_losses(9, figsize=(15,11))

In [None]:
interp.plot_confusion_matrix(figsize=(6,6), dpi=60)

In [None]:
interp.most_confused(min_val=2)

Calculate precision and recall.
* Precision is a measure of how often we were correct out of all the times we predicted pneumonia.  The error here is cases that were fine that we thought were pneumonia.
* Recall is a measure of how often we correctly predicted pneumonia out of all the actual pneumonia cases.  The error here is actual pneumonia cases that we failed to detect.  For these types of medical applications its probably better to optimize for this number.

In [None]:
(tn, fp), (fn, tp) = interp.confusion_matrix()

In [None]:
precision = tp / (tp + fp)
recall = tp / (tp + fn)

In [None]:
print('Precision: {:.4f}'.format(precision))
print('Recall: {:.4f}'.format(recall))