In [1]:
import os
import sys
import multiprocessing
import logging
import numpy as np
import pandas as pd

import mxnet as mx
from mxnet.io import DataDesc
from mxnet import nd, gluon, autograd
from mxnet.gluon.data import RecordFileDataset, ArrayDataset
from mxnet.gluon.data.vision.datasets import ImageRecordDataset
from mxnet.gluon.data.dataloader import DataLoader
from mxnet.gluon.model_zoo import vision as models
from mxnet import recordio

from sklearn.metrics.ranking import roc_auc_score
from sklearn.model_selection import train_test_split
from PIL import Image
from common.utils import *
from common.params_dense import *
import math
from time import time

%load_ext autoreload
%autoreload 2

  from ._conv import register_converters as _register_converters


In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("MXNet: ", mx.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.4 |Anaconda custom (64-bit)| (default, Nov 20 2017, 18:44:38) 
[GCC 7.2.0]
Numpy:  1.14.1
MXNet:  1.3.0
GPU:  ['Tesla V100-PCIE-16GB', 'Tesla V100-PCIE-16GB', 'Tesla V100-PCIE-16GB', 'Tesla V100-PCIE-16GB']
CUDA Version 9.0.176
CuDNN Version  7.0.5


In [3]:
CPU_COUNT = int(multiprocessing.cpu_count())
GPU_COUNT = int(len(get_gpu_name()))
MULTI_GPU = GPU_COUNT > 1
print("CPUs: ", CPU_COUNT)
print("GPUs: ", GPU_COUNT)

CPUs:  24
GPUs:  4


In [4]:
# Manually scale to multi-gpu
if MULTI_GPU:
    LR *= GPU_COUNT 
    BATCHSIZE *= GPU_COUNT

## Data Download

In [5]:
# Paths
CSV_DEST = "chestxray"
IMAGE_FOLDER = os.path.join(CSV_DEST, "images")
LABEL_FILE = os.path.join(CSV_DEST, "Data_Entry_2017.csv")
TRAIN_LST = os.path.join(CSV_DEST, "train.lst")
VALID_LST = os.path.join(CSV_DEST, "valid.lst")
TEST_LST = os.path.join(CSV_DEST, "test.lst")
TRAIN_REC = os.path.join(CSV_DEST, "train.rec")
VALID_REC = os.path.join(CSV_DEST, "valid.rec")
TEST_REC  = os.path.join(CSV_DEST, "test.rec")

In [6]:
%%time
# Download data
print("Please make sure to download")
print("https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-linux#download-and-install-azcopy")
download_data_chextxray(CSV_DEST)

Please make sure to download
https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-linux#download-and-install-azcopy
Data already exists
CPU times: user 623 ms, sys: 233 ms, total: 856 ms
Wall time: 855 ms


  params = attr.ib(convert=attr.converters.optional(tuple))
  ids = attr.ib(default=None, convert=_ensure_immutable_ids)


## Data prep
https://github.com/apache/incubator-mxnet/issues/1480


In [7]:
df = pd.read_csv(LABEL_FILE)
df.head()    

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,0.143,
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143,
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168,
3,00000002_000.png,No Finding,0,2,81,M,PA,2500,2048,0.171,0.171,
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,0.143,


In [8]:
def data_prep(df, img_dir, patient_ids):
    # Split labels on unfiltered data
    df_label = df['Finding Labels'].str.split(
        '|', expand=False).str.join(sep='*').str.get_dummies(sep='*')

    # Filter by patient-ids (both)
    df_label['Patient ID'] = df['Patient ID']
    df_label = df_label[df_label['Patient ID'].isin(patient_ids)]
    df = df[df['Patient ID'].isin(patient_ids)]
    # Remove unncessary columns
    df_label.drop(['Patient ID','No Finding'], axis=1, inplace=True)  

    # List of images (full-path)
    img_locs =  df['Image Index'].map(lambda im: os.path.join(img_dir, im)).values
    # One-hot encoded labels (float32 for BCE loss)
    df_label['Image_path'] = img_locs
    return df_label

In [9]:
# Training / Valid / Test split (70% / 10% / 20%)
train_set, valid_set, test_set = get_train_valid_test_split(TOT_PATIENT_NUMBER)
df_train = data_prep(df, IMAGE_FOLDER, train_set)
df_valid = data_prep(df, IMAGE_FOLDER, valid_set)
df_test = data_prep(df, IMAGE_FOLDER, test_set)
print(df_train.shape)
print(df_valid.shape)
print(df_test.shape)

train:21563 valid:3080 test:6162
(87306, 15)
(7616, 15)
(17198, 15)


In [10]:
df_train.to_csv(TRAIN_LST, sep='\t', header=False)
df_valid.to_csv(VALID_LST, sep='\t', header=False)
df_test.to_csv(TEST_LST, sep='\t', header=False)

In [11]:
PY_PATH = "/anaconda/envs/py35/bin/python"
REC_PY = "./common/im2rec.py"
if not os.path.isfile('chestxray/train.rec'):
    subprocess.call([PY_PATH, REC_PY, "chestxray/train.lst", os.getcwd(), 
                     '--resize', '224', '--center-crop', '--quality', '100', '--num-thread', str(CPU_COUNT), '--pack-label'])
    subprocess.call([PY_PATH, REC_PY, "chestxray/valid.lst", os.getcwd(), 
                     '--resize', '224', '--center-crop', '--quality', '100', '--num-thread', str(CPU_COUNT), '--pack-label'])
    subprocess.call([PY_PATH, REC_PY, "chestxray/test.lst", os.getcwd(), 
                     '--resize', '224', '--center-crop', '--quality', '100', '--num-thread', str(CPU_COUNT), '--pack-label'])

## Data Loading
https://mxnet.incubator.apache.org/architecture/note_data_loading.html#mxnet-io-python-interface

https://github.com/miraclewkf/multilabel-MXNet/blob/master/train_multilabel.py

### Pre-processing / Data Augmentation transforms

In [12]:
def transform_test(image, label):
    image = mx.image.resize_short(image, WIDTH)
    image = image.astype(np.float32)/255.
    image = mx.image.color_normalize(image,
                                      mean=mx.nd.array(IMAGENET_RGB_MEAN),
                                      std=mx.nd.array(IMAGENET_RGB_SD))
    image = image.transpose((2,0,1))
    return image, label
        


flipper = mx.image.HorizontalFlipAug(0.5)
def transform_aug(image, label):
    image = mx.image.resize_short(image, WIDTH+20)
    image, crop_info = mx.image.random_crop(image, (WIDTH, HEIGHT))
    image = image.astype(np.float32)/255.
    image = mx.image.color_normalize(image,
                                      mean=mx.nd.array(IMAGENET_RGB_MEAN, dtype=np.float32),
                                      std=mx.nd.array(IMAGENET_RGB_SD, dtype=np.float32))
    image = flipper(image)
    image = image.transpose((2,0,1))
    return image, label



### Hot fixing DataLoader for multi-processing and RecordFileDataset

We hot-fix MXNet dataloaders to support multiprocessing with the ImageRecordDataSet.
see this issue: https://github.com/apache/incubator-mxnet/issues/9974
This is a hack and hasn't been tested thoroughly, use with caution

In [13]:
# We keep the filename as an attribute
# So that we can open a new handle per process
# in the dataloader

def __init__new(self, filename):
    self._filename = filename
    self.reinit()
    
def reinit(self):
    idx_file = os.path.splitext(self._filename)[0] + '.idx'
    self._record = mx.recordio.MXIndexedRecordIO(idx_file, self._filename, 'r')
    
RecordFileDataset.reinit = reinit
RecordFileDataset.__init__ = __init__new

# We modify the dataloader worker_loop to reinit the dataset if possible
# And then call to the original worker_loop

mx.gluon.data.dataloader.worker_loop_ = mx.gluon.data.dataloader.worker_loop

def worker_loop(dataset, key_queue, data_queue, batchify_fn):
    if 'reinit' in dir(dataset):
        dataset.reinit()
    mx.gluon.data.dataloader.worker_loop_(dataset, key_queue, data_queue, batchify_fn)

mx.gluon.data.dataloader.worker_loop = worker_loop

### Creating the datasets

In [14]:
train_dataset = ImageRecordDataset(TRAIN_REC, transform=transform_aug)
val_dataset = ImageRecordDataset(VALID_REC, transform=transform_test)
test_dataset = ImageRecordDataset(TEST_REC, transform=transform_aug)

In [15]:
train_dataloader = DataLoader(train_dataset, BATCHSIZE, shuffle=True, num_workers=CPU_COUNT, last_batch='rollover')
val_dataloader = DataLoader(val_dataset, BATCHSIZE, shuffle=False, num_workers=CPU_COUNT, last_batch='discard')
test_dataloader = DataLoader(test_dataset, BATCHSIZE, shuffle=False, num_workers=CPU_COUNT, last_batch='discard')

## Creating the network

### Loading the symbols and weights of a pre-trained model and removing the last layer

In [16]:
def get_symbol():
    get_mxnet_model('https://migonzastorage.blob.core.windows.net/deep-learning/models/mxnet/densenet-121', 0)
    sym, arg_params, aux_params = mx.model.load_checkpoint('densenet-121', 0)
    layer_name='pool5'
    all_layers = sym.get_internals()
    sym = all_layers[layer_name+'_output']
    new_args = dict({k:arg_params[k] for k in arg_params})
    return sym, new_args, aux_params

In [17]:
new_sym, arg_params, aux_params = get_symbol()

In [18]:
ctx = [mx.gpu(i) for i in range(GPU_COUNT)]   

### Assigning pre-trained params to a Gluon Symbol block

In [19]:
pre_trained = gluon.nn.SymbolBlock(outputs=new_sym, inputs=mx.sym.var('data'))
net_params = pre_trained.collect_params()
for param in arg_params:
    if param in net_params:
        net_params[param]._load_init(arg_params[param], ctx=ctx)
for param in aux_params:
    if param in net_params:
        net_params[param]._load_init(aux_params[param], ctx=ctx)

### Creating a new fully connected layer with CLASSES units

In [20]:
dense = gluon.nn.Dense(CLASSES)
dense.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

### Creating the new Gluon network

In [21]:
net = gluon.nn.HybridSequential()
net.add(pre_trained)
net.add(dense)
net.hybridize()

## Trainer

In [22]:
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': LR})

## Loss 

In [23]:
binary_cross_entropy = gluon.loss.SigmoidBinaryCrossEntropyLoss()

## Output

In [24]:
sig = gluon.nn.Activation('sigmoid')

## Evaluation loop

In [25]:
def evaluate_accuracy(data_iterator, net):
    acc = 0
    for i, (data, label) in enumerate(data_iterator):
        data_split = gluon.utils.split_and_load(data, ctx)
        label_split = gluon.utils.split_and_load(label, ctx)
        outputs = [(sig(net(X)),Y) for X, Y in zip(data_split, label_split)]
        for output, label in outputs:
            acc += float((label.asnumpy() == np.round(output.asnumpy())).sum()) / CLASSES / output.shape[0]
    return acc/i/len(ctx)

## Training loop

In [26]:
%%time
n_batch = 50
for e in range(EPOCHS):
    tick = time()
    for i, (data, label) in enumerate(train_dataloader):        
        data_split = gluon.utils.split_and_load(data, ctx)
        label_split = gluon.utils.split_and_load(label, ctx)  
        
        # Printing the loss here to allow data to be loaded asynchronously on the GPU
        if (i%n_batch == 0 and i > 0):
            print('Batch {0}: Sigmoid Binary Cross Entropy Loss: {1:.4f}'.format(i,sum(losses).mean().asscalar()))            
            
        with autograd.record():
            losses = [binary_cross_entropy(net(X), Y) for X, Y in zip(data_split, label_split)]
        for l in losses:
            l.backward()
        trainer.step(data.shape[0])
    test_accuracy = evaluate_accuracy(val_dataloader, net)
    print('Epoch {0}, {1:.6f} test_accuracy after {2:.2f} seconds'.format(e, test_accuracy, time()-tick))

MXNetError: [11:35:49] /home/travis/build/dmlc/mxnet-distro/mxnet-build/3rdparty/mshadow/mshadow/./stream_gpu-inl.h:62: Check failed: e == cudaSuccess CUDA: an illegal memory access was encountered

Stack trace returned 10 entries:
[bt] (0) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x34d2fa) [0x7ff0678672fa]
[bt] (1) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x34d921) [0x7ff067867921]
[bt] (2) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x2653c50) [0x7ff069b6dc50]
[bt] (3) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x26c5a2a) [0x7ff069bdfa2a]
[bt] (4) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x2629529) [0x7ff069b43529]
[bt] (5) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x2632d24) [0x7ff069b4cd24]
[bt] (6) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x2636bc3) [0x7ff069b50bc3]
[bt] (7) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x2636e16) [0x7ff069b50e16]
[bt] (8) /anaconda/envs/py35/lib/python3.5/site-packages/mxnet/libmxnet.so(+0x2633434) [0x7ff069b4d434]
[bt] (9) /anaconda/envs/py35/bin/../lib/libstdc++.so.6(+0xafc5c) [0x7ff0b4361c5c]



## Evaluate

In [None]:
%%time
predictions = np.zeros((0, CLASSES))
labels = np.zeros((0, CLASSES))
for (data, label) in (test_dataloader):        
    data_split = gluon.utils.split_and_load(data, ctx)
    label_split = gluon.utils.split_and_load(label, ctx)  
    outputs = [sig(net(X)) for X in data_split]
    predictions = np.concatenate([predictions, np.concatenate([output.asnumpy() for output in outputs])])
    labels = np.concatenate([labels, np.concatenate([label.asnumpy() for label in label_split])])

In [None]:
print("Validation AUC: {0:.4f}".format(compute_roc_auc(labels, predictions, CLASSES)))

## Synthetic Data (Pure Training)

In [None]:
%%time
# Test on fake-data -> no IO lag
batch_in_epoch = len(train_dataset)//BATCHSIZE
tot_num = batch_in_epoch * BATCHSIZE
fake_X = np.random.rand(tot_num, 3, 224, 224).astype(np.float32)
fake_y = np.random.rand(tot_num, CLASSES).astype(np.float32) 

In [None]:
train_dataset_synth = ArrayDataset(fake_X, fake_y)
train_dataloader_synth = DataLoader(train_dataset_synth, BATCHSIZE, shuffle=True, num_workers=CPU_COUNT, last_batch='discard')

In [None]:
%%time
n_batch = 50
for e in range(EPOCHS):
    tick = time()
    for i, (data, label) in enumerate(train_dataloader_synth):        
        data_split = gluon.utils.split_and_load(data, ctx)
        label_split = gluon.utils.split_and_load(label, ctx)  
        
        # Printing the loss here to allow data to be loaded asynchronously on the GPU
        if (i%n_batch == 0 and i > 0):
            print('Batch {0}: Sigmoid Binary Cross Entropy Loss: {1:.4f}'.format(i,sum(losses).mean().asscalar()))            
            
        with autograd.record():
            losses = [binary_cross_entropy(net(X), Y) for X, Y in zip(data_split, label_split)]
        for l in losses:
            l.backward()
        trainer.step(data.shape[0])
    

    print('Epoch {0}, {1:.2f} seconds, loss {2:.4f}'.format(e, time()-tick), sum(losses).mean().asscalar())
