In [1]:
import os
import sys
import time
import multiprocessing
import numpy as np
import pandas as pd
import mxnet as mx
from mxnet.io import DataDesc
from mxnet.gluon.model_zoo import vision as models
from sklearn.metrics.ranking import roc_auc_score
from sklearn.model_selection import train_test_split
from PIL import Image
from common.utils import *

%load_ext autoreload
%autoreload 2

In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("MXNet: ", mx.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())
CPU_COUNT = multiprocessing.cpu_count()
print("CPUs: ", CPU_COUNT)

OS:  linux
Python:  3.5.4 |Anaconda custom (64-bit)| (default, Nov  3 2017, 20:01:27) 
[GCC 7.2.0]
MXNet:  0.12.0
Numpy:  1.13.3
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21
CPUs:  24


In [3]:
# User-set
# Note if NUM_GPUS > 1 then MULTI_GPU = True and ALL GPUs will be used
# Set below to affect batch-size
# E.g. 1 GPU = 64, 2 GPUs = 64*2, 4 GPUs = 64*4
# Note that the effective learning-rate will be decreased this way
NUM_GPUS = 4 # Scaling factor for batch
MULTI_GPU=NUM_GPUS>1

In [4]:
# Globals
CLASSES = 14
WIDTH = 224
HEIGHT = 224
CHANNELS = 3
LR = 0.0001  # Effective learning-rate will decrease as BATCHSIZE rises
EPOCHS = 5
BATCHSIZE = 64*NUM_GPUS
IMAGENET_RGB_MEAN = [0.485, 0.456, 0.406]
IMAGENET_RGB_SD = [0.229, 0.224, 0.225]
TOT_PATIENT_NUMBER = 30805  # From data

In [5]:
# Paths
CSV_DEST = "chestxray"
IMAGE_FOLDER = os.path.join(CSV_DEST, "images")
LABEL_FILE = os.path.join(CSV_DEST, "Data_Entry_2017.csv")
TRAIN_LST = os.path.join(CSV_DEST, "train.lst")
VALID_LST = os.path.join(CSV_DEST, "valid.lst")
TEST_LST = os.path.join(CSV_DEST, "test.lst")


In [6]:
%%time
# Download data
print("Please make sure to download")
print("https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-linux#download-and-install-azcopy")
download_data_chextxray(CSV_DEST)

Please make sure to download
https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-linux#download-and-install-azcopy
Data already exists
CPU times: user 873 ms, sys: 410 ms, total: 1.28 s
Wall time: 1.28 s


## Data prep
https://github.com/apache/incubator-mxnet/issues/1480


In [7]:
df = pd.read_csv(LABEL_FILE)
df.head()    

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,0.143,
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143,
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168,
3,00000002_000.png,No Finding,0,2,81,M,PA,2500,2048,0.171,0.171,
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,0.143,


In [8]:
# Split labels on unfiltered data
df_label = df['Finding Labels'].str.split('|', expand=False).str.join(sep='*').str.get_dummies(sep='*')
df_label['Image_path'] = IMAGE_FOLDER + os.path.sep + df['Image Index']
#df_label.drop('No Finding', axis=1, inplace=True)
df_label.head()

Unnamed: 0,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax,Image_path
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,chestxray/images/00000001_000.png
1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,chestxray/images/00000001_001.png
2,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,chestxray/images/00000001_002.png
3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,chestxray/images/00000002_000.png
4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,chestxray/images/00000003_000.png


In [9]:
# Training / Valid / Test split (70% / 10% / 20%)
df_train, df_valid, df_test = split_train_val_test(df_label, val_size=0.1, test_size=0.2)
print(df_train.shape)
print(df_valid.shape)
print(df_test.shape)

(78484, 16)
(11212, 16)
(22424, 16)


In [10]:
df_train.to_csv(TRAIN_LST, sep='\t', header=False)
df_valid.to_csv(VALID_LST, sep='\t', header=False)
df_test.to_csv(TEST_LST, sep='\t', header=False)

In [11]:
#%tb
#%run ./common/im2rec.py $TRAIN_LST $PWD --resize 224 --center-crop --quality 90 --num-thread 24
#run not working!?
#python ./common/im2rec.py chestxray/train.lst $PWD --resize 250 --center-crop --quality 90 --num-thread 24
#python ./common/im2rec.py chestxray/valid.lst $PWD --resize 250 --center-crop --quality 90 --num-thread 24
#python ./common/im2rec.py chestxray/test.lst $PWD --resize 250 --center-crop --quality 90 --num-thread 24

Did not find and list file with prefix /home/hoaphumanoid/notebooks/repos/DeepLearningFrameworks/notebooks/$TRAIN_LST


## Data Loading
https://mxnet.incubator.apache.org/architecture/note_data_loading.html#mxnet-io-python-interface

https://github.com/miraclewkf/multilabel-MXNet/blob/master/train_multilabel.py

In [12]:
train = mx.io.ImageRecordIter(
    path_imgrec = "chestxray/train.rec",
    data_shape = (3,WIDTH,HEIGHT),
    path_imglist = "chestxray/train.lst",
    label_width = 15,
    batch_size = BATCHSIZE,
    shuffle = True,
    mean_r = IMAGENET_RGB_MEAN[0],
    mean_g = IMAGENET_RGB_MEAN[1],
    mean_b = IMAGENET_RGB_MEAN[2],
    std_r = IMAGENET_RGB_SD[0],
    std_g = IMAGENET_RGB_SD[1],
    std_b = IMAGENET_RGB_SD[2],
    rand_crop = 1,
    rand_mirror = 1, #flip horizontally
    max_rotate_angle = 10,
    preprocess_threads = 24
)

valid = mx.io.ImageRecordIter(
    path_imgrec = "chestxray/valid.rec",
    data_shape = (3,WIDTH,HEIGHT),
    path_imglist = "chestxray/valid.lst",
    label_width = 15,
    batch_size = BATCHSIZE,
    shuffle = False,
    rand_crop = 0,
    rand_mirror = 0, #flip horizontally
    preprocess_threads = 24
)


test = mx.io.ImageRecordIter(
    path_imgrec = "chestxray/test.rec",
    data_shape = (3,WIDTH,HEIGHT),
    path_imglist = "chestxray/test.lst",
    label_width = 15,
    batch_size = BATCHSIZE,
    shuffle = False,
    rand_mirror = 0, #flip horizontally
    preprocess_threads = 24
)


## Helper Functions

In [13]:
import os, sys

if sys.version_info[0] >= 3:
    from urllib.request import urlretrieve
else:
    from urllib import urlretrieve

def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urlretrieve(url, filename)

In [14]:
def get_model(prefix, epoch):
    download(prefix+'-symbol.json')
    download(prefix+'-%04d.params' % (epoch,))

In [15]:
#https://hackernoon.com/transfer-learning-with-mxnet-gluon-8203005afafe
#http://mxnet.incubator.apache.org/faq/finetune.html
def get_symbol(model_name='resnet50', out_features=CLASSES):
    if model_name == 'densenet121':
        pretrained = models.densenet121(pretrained=True)
        model = models.densenet121(classes=out_features)
        model.features = pretrained.features
    elif model_name == 'resnet50':
        get_model('http://data.mxnet.io/models/imagenet/resnet/50-layers/resnet-50', 0)
        model, arg_params, aux_params = mx.model.load_checkpoint('resnet-50', 0)
    else:
        raise ValueError("Unknown model-name")

    return model

In [16]:
def get_fine_tune_model(symbol, arg_params, num_classes, layer_name='flatten0'):
    """
    symbol: the pretrained network symbol
    arg_params: the argument parameters of the pretrained model
    num_classes: the number of classes for the fine-tune datasets
    layer_name: the layer name before the last fully-connected layer
    """
    all_layers = symbol.get_internals()
    net = all_layers[layer_name+'_output']
    net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc1')
    net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
    new_args = dict({k:arg_params[k] for k in arg_params if 'fc1' not in k})
    return (net, new_args)

In [17]:
def fit(symbol, arg_params, aux_params, train, val, batch_size, num_gpus):
    devs = [mx.gpu(i) for i in range(num_gpus)]
    mod = mx.mod.Module(symbol=symbol, context=devs)
    mod.fit(train, val,
        num_epoch=8,
        arg_params=arg_params,
        aux_params=aux_params,
        allow_missing=True,
        batch_end_callback = mx.callback.Speedometer(batch_size, 10),
        #epoch_end_callback=mx.callback.Speedometer(batch_size, 10),
        kvstore='device',
        optimizer='sgd',
        optimizer_params={'learning_rate':0.01},
        initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2),
        eval_metric='acc')
    metric = mx.metric.Accuracy()
    return mod.score(val, metric)

In [18]:
get_model('http://data.mxnet.io/models/imagenet/resnet/50-layers/resnet-50', 0)
sym, arg_params, aux_params = mx.model.load_checkpoint('resnet-50', 0)
(new_sym, new_args) = get_fine_tune_model(sym, arg_params, 15)

In [19]:
mod_score = fit(new_sym, new_args, aux_params, train, valid, BATCHSIZE, NUM_GPUS)
mod_score

[('accuracy', 0.91647727272727275)]

In [55]:
def init_symbol(sym, lr=LR, gpus=NUM_GPUS, batch_size=BATCHSIZE, epochs=EPOCHS, num_examples=TOT_PATIENT_NUMBER, step=[5,10]):    
    devs = [mx.gpu(i) for i in range(gpus)]   
    model = mx.mod.Module(
        context       = devs,
        symbol        = sym
    )
    #model.bind(data_shapes=[DataDesc(name='data', shape=(batchs, maxl))],
    #         label_shapes=[DataDesc(name='softmax_label', shape=(batchs,))])
    model.bind(data_shapes=[('data', (1,1,224,224))], label_shapes=[('label', (1,15))])
    # Glorot-uniform initializer
    #model.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))
    model.output.initialize(mx.init.Xavier(rnd_type='uniform'))
    model.init_optimizer(optimizer='Adam', 
                       optimizer_params=(('learning_rate', lr),
                                         ('beta1', 0.9),
                                         ('beta2', 0.999)))
    
    #Criterion
    def acc(label, pred, label_width = num_class):
        return float((label == np.round(pred)).sum()) / label_width / pred.shape[0]

    def loss(label, pred):
        loss_all = 0
        for i in range(len(pred)):
            loss = 0
            loss -= label[i] * np.log(pred[i] + 1e-6) + (1.- label[i]) * np.log(1. + 1e-6 - pred[i])
            loss_all += np.sum(loss)
        loss_all = float(loss_all)/float(len(pred) + 0.000001)
        return loss_all
    
    cri = list()
    cri.append(mx.metric.np(acc))
    cri.append(mx.metric.np(loss))
    
    #Scheduler
    def multi_factor_scheduler(begin_epoch, epoch_size, step=step, factor=0.1):
        step_ = [epoch_size * (x-begin_epoch) for x in step if x-begin_epoch > 0]
    return mx.lr_scheduler.MultiFactorScheduler(step=step_, factor=factor) if len(step_) else None

    epoch_size = max(int(num_examples / batch_size), 1)
    sch=multi_factor_scheduler(epochs, epoch_size)
    
    return model, cri, sch 

In [56]:
model = get_symbol(model_name='densenet121', out_features=CLASSES+1)
model, criterion, scheduler = init_symbol(model, num_examples=df_train.shape[0])

AttributeError: 'DenseNet' object has no attribute 'list_arguments'

In [None]:
# run
model.fit(train,
              num_epoch=EPOCHS,
              eval_data=valid,
              eval_metric=cri,
              #batch_end_callback=batch_end_callbacks,
              epoch_end_callback=batch_end_callback=mx.callback.Speedometer(BATCHSIZE, 50),
              allow_missing=True)

In [27]:
#####################################################################################################
## Test CheXNet

In [28]:
%%time
# Load model for testing
# I comment this out to create a fair test against Keras
#chexnet_sym_test = get_symbol()
#chkpt = torch.load("best_chexnet.pth.tar")
#chexnet_sym_test.load_state_dict(chkpt['state_dict'])

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 10.3 µs


In [29]:
%%time
## Evaluate
# AUC: 0.8095
#test_loss = valid_epoch(chexnet_sym_test, test_loader, criterion, -1, 'testing')
test_loss = valid_epoch(chexnet_sym, test_loader, criterion, -1, 'testing')

Testing epoch 0
Test-Dataset loss: 0.1540
Full AUC [0.8002874678587735, 0.8327232954883196, 0.7960755212055248, 0.886545471239617, 0.8805040798213019, 0.923330137938897, 0.7402972652710587, 0.8543066530487914, 0.6278403229432927, 0.8110967774108574, 0.7306166324926078, 0.7963945586737139, 0.7731040564373898, 0.8799044890256512]
Test-Dataset AUC: 0.8095
CPU times: user 11.4 s, sys: 10.6 s, total: 22 s
Wall time: 3min 16s


In [30]:
#####################################################################################################
## Extra: IO Experiment (time on numpy arrays)

In [63]:
def data_gen_to_numpy(data_gen, bs, ch=CHANNELS, wi=WIDTH, hi=HEIGHT, cl=CLASSES):
    x_dta = np.zeros((data_gen.__len__()*bs, ch, wi, hi), 
                   dtype=np.float32)
    y_dta = np.zeros((data_gen.__len__()*bs, cl),
                   dtype=np.int32)
    c = 0
    for x, y in data_gen:
        ln = len(y)
        x_dta[c*ln:(c+1)*ln] = x
        y_dta[c*ln:(c+1)*ln] = y
        c+= 1
    return x_dta, y_dta

In [60]:
%%time
x_train, y_train = data_gen_to_numpy(train_loader, BATCHSIZE)

CPU times: user 21.7 s, sys: 17.3 s, total: 39 s
Wall time: 3min 5s


In [64]:
%%time
x_val, y_val = data_gen_to_numpy(valid_loader, 16*BATCHSIZE)

CPU times: user 1.78 s, sys: 5.08 s, total: 6.86 s
Wall time: 46.6 s


In [67]:
print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)

(87424, 3, 224, 224) (87424, 14)
(8192, 3, 224, 224) (8192, 14)


In [72]:
train_loader = yield_mb(x_train, y_train, BATCHSIZE, shuffle=False)
valid_loader = yield_mb(x_val, y_val, BATCHSIZE, shuffle=False)

In [73]:
%%time
# Time for one epoch is 280s vs 340s with data-generator (60s of IO lag)
stime = time.time()
train_epoch(chexnet_sym, train_loader, optimizer, criterion, -1)
loss_val = valid_epoch(chexnet_sym, valid_loader, criterion, -1)
scheduler.step(loss_val)
etime = time.time()
print("Epoch time: {0:.0f} seconds".format(etime-stime))
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

Training epoch 0
Training loss: 0.1426
Validating epoch 0
Validation loss: 0.1181
Full AUC [0.7325276556792604, 0.8143018460329929, 0.6344205025402343, 0.7513804306120363, 0.8940063333242, 0.9349639412633591, 0.8541304263464525, 0.9589464678562699, 0.5537374593754416, 0.8553994709891826, 0.7931636873466525, 0.7896508852649442, 0.7662702585788976, 0.8515950708399794]
Validation AUC: 0.7989
Epoch time: 280 seconds
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CPU times: user 4min 37s, sys: 34.3 s, total: 5min 12s
Wall time: 4min 39s
