# Prepare
- Install OpenVINO (Tested 2019R3.1 on this notebook)
- Install Intel-PyTorch following [here](https://software.intel.com/en-us/articles/getting-started-with-intel-optimization-of-pytorch)
- Clone [this repository](https://github.com/taneishi/CheXNet)
- Put this notebook into the root folder of the cloned repository as above.
- Download a dataset from [here](https://nihcc.app.box.com/v/ChestXray-NIHCC) and unzip it and put them into "ChestX-ray14/images".

# Export a ONNX
First, we need to export the model as a ONNX format.

In [None]:
"""
The main CheXNet model implementation.
"""
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from read_data import ChestXrayDataSet
from sklearn.metrics import roc_auc_score
import timeit

CKPT_PATH = './model.pth.tar'
N_CLASSES = 14
CLASS_NAMES = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia',
                'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']
DATA_DIR = './ChestX-ray14/images'
TEST_IMAGE_LIST = './ChestX-ray14/labels/test_list.txt'
BATCH_SIZE = 32

def export_onnx():
    #cudnn.benchmark = True

    #torch.cuda.set_enabled_lms(True) 
    #print('LMS is %s' % ('On' if torch.cuda.get_enabled_lms() else 'Off'))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)
    
    # initialize and load the model
    model = DenseNet121(N_CLASSES).to(device)

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model).to(device)


    if os.path.isfile(CKPT_PATH):
        print("=> loading checkpoint")
        checkpoint = torch.load(CKPT_PATH, map_location=device)
        state_dict = {}
        for k,v in checkpoint['state_dict'].items():
            k = k.replace('module.', '')
            k = k.replace('norm.1', 'norm1')
            k = k.replace('norm.2', 'norm2')
            k = k.replace('conv.1', 'conv1')
            k = k.replace('conv.2', 'conv2')
            state_dict[k] = v
        model.load_state_dict(state_dict)
        print("=> loaded checkpoint")
    else:
        print("=> no model found")


    model.train(False)
    dummy_input = torch.randn(BATCH_SIZE,3,224,224)
    torch_out = model(dummy_input)
    torch.onnx.export(model,
                      dummy_input, 
                      'densenet121.onnx',
                      export_params=True,
                      do_constant_folding= True,
                      input_names=['input'],
                      output_names=['output'],
                      dynamic_axes={'input': {0 : 'batch_size'},
                                    'output': {0: 'batch_size'}},
                      verbose=True)



class DenseNet121(nn.Module):
    """Model modified.

    The architecture of our model is the same as standard DenseNet121
    except the classifier layer which has an additional sigmoid function.

    """
    def __init__(self, out_size):
        super(DenseNet121, self).__init__()
        self.densenet121 = torchvision.models.densenet121(pretrained=True)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Sequential(
            nn.Linear(num_ftrs, out_size),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.densenet121(x)
        return x

In [None]:
export_onnx()

# Run OpenVINO's Model Optimizer
Then, we can run MO to convert the ONNX to IR.

In [None]:
!python /opt/intel/openvino/deployment_tools/model_optimizer/mo_onnx.py --input_model=densenet121.onnx --data_type=FP32 --batch=1

# Create annotation data
Then, we need to create annotation file for calibration. Since we use the original dataset here, custom conversion script is needed. Ypu can download the script from [here](annotation.py)

Note: Maybe you should install some additional libraries as below.
- yamlloader
- nibabel 
- tqdm
- Shapely

In [None]:
# Folder to store annotation file is going to be created here.
!mkdir annotations

In [None]:
!python annotation.py chest_xray --annotation_file ChestX-ray14/labels/val_list.txt -ss 200 -o annotations -a chestx.pickle -m chestx.json --data_dir ChestX-ray14

# Run OpenVINO's Calibrator
Then, we can run the calibrator. Also we run custome caribration script here because custom adapter is needed.

Note: Maybe you should install some additional libraries as below.
- xmltodict
- progress 
- py-cpuinfo

In [None]:
# Note: This command may not work fine on Notebook. If so, you can run this on command line.
!python calibrate.py --config chestx.yml -d def.yml -M /opt/intel/openvino/deployment_tools/model_optimizer --models . --annotations annotations --batch_size 64

# Run inference (SYNC mode)
Finally, we can execute inference with the INT8 model. This is syncronized mode.

In [None]:
"""
The main CheXNet model implementation.
"""
import os
import sys
import numpy as np

from sklearn.metrics import roc_auc_score, accuracy_score
import timeit

import sys
import os
import numpy as np
import logging as log
from time import time
from openvino.inference_engine import IENetwork, IECore

import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from read_data import ChestXrayDataSet


N_CLASSES = 14
CLASS_NAMES = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia',
                'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']
DATA_DIR = './ChestX-ray14/images'
TEST_IMAGE_LIST = './ChestX-ray14/labels/test_list.txt'
BATCH_SIZE = 32
N_CROPS = 10


def crop(img, top, left, height, width):
    return img.crop((left,top, left+width, top+height))

def five_crop(img, size):
    image_width, image_height = img.width
    crop_heigh, crop_width = size
    tl = img.crop((0,0, crop_width, crop_height))
    tr = img.crop((image_width - crop_width, 0, image_width, crop_height))
    return (tl, tr)
    
def run_sync():
    model_xml = "densenet121_i8.xml"
    model_bin = os.path.splitext(model_xml)[0]+".bin"

    log.info("Creating Inference Engine")
    ie = IECore()
    net = IENetwork(model=model_xml, weights=model_bin)
    log.info("Preparing input blobs")
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))
    net.batch_size = (BATCH_SIZE*N_CROPS)

    n, c, h, w = net.inputs[input_blob].shape


    # for image load
    # test_dataset = ChestXrayDataSet(data_dir=DATA_DIR,
    #                                 image_list_file=TEST_IMAGE_LIST,
    #                                 transform=transforms.Compose([
    #                                     transforms.Resize(256),
    #                                     transforms.TenCrop(224),
    #                                     transforms.Lambda
    #                                     (lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops]))]))
    normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])

    test_dataset = ChestXrayDataSet(data_dir=DATA_DIR,
                                    image_list_file=TEST_IMAGE_LIST,
                                    transform=transforms.Compose([
                                        transforms.Resize(256),
                                        transforms.TenCrop(224),
                                        transforms.Lambda
                                        (lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
                                        transforms.Lambda
                                        (lambda crops: torch.stack([normalize(crop) for crop in crops]))
                                    ]))
    
    print(test_dataset)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE,
                             shuffle=False, num_workers=10, pin_memory=False)

    gt = torch.FloatTensor()
    pred = torch.FloatTensor()
    
    # images = np.ndarray(shape=(n,c,h,w))

    #loading model to the plugin
    log.info("Loading model to the plugin")
    #exec_net = ie.load_network(network=net, device_name="CPU", config={'DYN_BATCH_ENABLED': 'YES'})
    exec_net = ie.load_network(network=net, device_name="CPU")

    
    now = timeit.default_timer()
    for i , (inp, target) in enumerate(test_loader):
        gt = torch.cat((gt, target), 0)
        bs, n_crops, c, h, w = inp.size()
        images = inp.view(-1, c, h, w).numpy()
        # print(images.shape)
        # print(bs)
        if bs !=  BATCH_SIZE:
            images2 = np.zeros(shape=(BATCH_SIZE* n_crops, c, h, w))
            images2[:bs*n_crops, :c, :h, :w] = images
            images = images2
        res = exec_net.infer(inputs={input_blob: images})
        res = res[out_blob]
        res = res.reshape(BATCH_SIZE, n_crops,-1)
        #print(res)
        res = np.mean(res, axis=1)
        if bs != BATCH_SIZE:
            #print(res.shape)
            res = res[:bs, :res.shape[1]]
        #print(res)
        pred = torch.cat((pred, torch.from_numpy(res)), 0)
        #print(res.shape)
        
    print('Elapsed time: %0.2f sec.' % (timeit.default_timer() - now))

    AUROCs = compute_AUCs(gt, pred)
    AUROC_avg = np.array(AUROCs).mean()
    print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
    for i in range(N_CLASSES):
        print('The AUROC of {} is {:.3f}'.format(CLASS_NAMES[i], AUROCs[i]))

def roc_auc_score_FIXED(y_true, y_pred):
    if len(np.unique(y_true)) == 1:
        return accuracy_score(y_true, np.rint(y_pred))
    return roc_auc_score(y_true, y_pred)

        
def compute_AUCs(gt, pred):
    """Computes Area Under the Curve (AUC) from prediction scores.

    Args:
        gt: Pytorch tensor on GPU, shape = [n_samples, n_classes]
          true binary labels.
        pred: Pytorch tensor on GPU, shape = [n_samples, n_classes]
          can either be probability estimates of the positive class,
          confidence values, or binary decisions.

    Returns:
        List of AUROCs of all classes.
    """
    AUROCs = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    for i in range(N_CLASSES):
        AUROCs.append(roc_auc_score_FIXED(gt_np[:, i], pred_np[:, i]))
    return AUROCs

In [None]:
run_sync()

# Run Inference (ASYNC mode)
This is asynchronized mode.

In [None]:
"""
The main CheXNet model implementation.
"""
import os
import sys
import numpy as np

from sklearn.metrics import roc_auc_score, accuracy_score
import timeit

import sys
import os
import numpy as np
import logging as log
from time import time
from openvino.inference_engine import IENetwork, IECore

import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from read_data import ChestXrayDataSet

# for async
#from openvino.tools.benchmark.utils.infer_request_wrap import InferRequestsQueue
# copy code from the file
from datetime import datetime
import threading


class InferReqWrap:
    def __init__(self, request, req_id, callback_queue, out_blob):
        self.req_id = req_id
        self.request = request
        self.request.set_completion_callback(self.callback, self.req_id)
        self.callbackQueue = callback_queue
        self.__ground_truth = torch.FloatTensor()
        self.__pred = torch.FloatTensor()
        self.out_blob = out_blob

    def callback(self, status_code, user_data):
        if user_data != self.req_id:
            print('Request ID {} does not correspond to user data {}'.format(self.req_id, user_data))
        elif status_code:
            print('Request {} failed with status code {}'.format(self.req_id, status_code))
        res = self.request.outputs[self.out_blob]
        res = res.reshape(BATCH_SIZE, N_CROPS, -1)
        res = np.mean(res, axis = 1)
        if self.__bs != BATCH_SIZE:
            res = res[:self.__bs, :res.shape[1]]
        self.__pred = torch.cat((self.__pred, torch.from_numpy(res)), 0)
        self.callbackQueue(self.req_id, self.request.latency)

    def start_async(self, input_data, bs, ground_truth=None):
        self.__ground_truth=torch.cat((self.__ground_truth, ground_truth), 0)
        self.__bs = bs
        self.request.async_infer(input_data)

    def infer(self, input_data, ground_truth=None):
        self.request.infer(input_data)
        self.callbackQueue(self.req_id, self.request.latency)

    def get_ground_truth(self):
        return self.__ground_truth

    def get_prediction(self):
        return self.__pred


class InferRequestsQueue:
    def __init__(self, requests, out_blob):
        self.idleIds = []
        self.requests = []
        self.times = []
        for req_id in range(len(requests)):
            self.requests.append(InferReqWrap(requests[req_id], req_id, self.put_idle_request, out_blob))
            self.idleIds.append(req_id)
        self.startTime = datetime.max
        self.endTime = datetime.min
        self.cv = threading.Condition()

    def reset_times(self):
        self.times.clear()

    def get_duration_in_seconds(self):
        return (self.endTime - self.startTime).total_seconds()

    def put_idle_request(self, req_id, latency):
        self.cv.acquire()
        self.times.append(latency)
        self.idleIds.append(req_id)
        self.endTime = max(self.endTime, datetime.now())
        self.cv.notify()
        self.cv.release()

    def get_idle_request(self):
        self.cv.acquire()
        while len(self.idleIds) == 0:
            self.cv.wait()
        req_id = self.idleIds.pop()
        self.startTime = min(datetime.now(), self.startTime)
        self.cv.release()
        return self.requests[req_id]

    def wait_all(self):
        self.cv.acquire()
        while len(self.idleIds) != len(self.requests):
            self.cv.wait()
        self.cv.release()



N_CLASSES = 14
CLASS_NAMES = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia',
                'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']
DATA_DIR = './ChestX-ray14/images'
TEST_IMAGE_LIST = './ChestX-ray14/labels/test_list.txt'

BATCH_SIZE = 32
N_CROPS = 10
NUM_REQUESTS=8

def run_async():
    model_xml = "densenet121_i8.xml"
    model_bin = os.path.splitext(model_xml)[0]+".bin"

    log.info("Creating Inference Engine")
    ie = IECore()
    net = IENetwork(model=model_xml, weights=model_bin)
    log.info("Preparing input blobs")
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))
    net.batch_size = (BATCH_SIZE*N_CROPS)

    n, c, h, w = net.inputs[input_blob].shape

    normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])

    test_dataset = ChestXrayDataSet(data_dir=DATA_DIR,
                                    image_list_file=TEST_IMAGE_LIST,
                                    transform=transforms.Compose([
                                        transforms.Resize(256),
                                        transforms.TenCrop(224),
                                        transforms.Lambda
                                        (lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
                                        transforms.Lambda
                                        (lambda crops: torch.stack([normalize(crop) for crop in crops]))
                                    ]))
    
    print(test_dataset)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE,
                             shuffle=False, num_workers=10, pin_memory=False)

    gt = torch.FloatTensor()
    pred = torch.FloatTensor()
    
    #loading model to the plugin
    log.info("Loading model to the plugin")
    #exec_net = ie.load_network(network=net, device_name="CPU", config={'DYN_BATCH_ENABLED': 'YES'})

    #config = {"CPU_THREADS_NUM": "48", "CPU_THROUGHPUT_STREAMS": "CPU_THROUGHPUT_AUTO"}
    config = {"CPU_THROUGHPUT_STREAMS": "8"}
    exec_net = ie.load_network(network=net, device_name="CPU", config=config, num_requests=NUM_REQUESTS)
    # Number of requests
    infer_requests = exec_net.requests
    print("reqeuest len", len(infer_requests))
    request_queue = InferRequestsQueue(infer_requests, out_blob)

    now = timeit.default_timer()
    for i , (inp, target) in enumerate(test_loader):
        # gt = torch.cat((gt, target), 0)
        bs, n_crops, c, h, w = inp.size()
        images = inp.view(-1, c, h, w).numpy()
        #print(images.shape)
        #print(bs)
        if bs !=  BATCH_SIZE:
            images2 = np.zeros(shape=(BATCH_SIZE* n_crops, c, h, w))
            images2[:bs*n_crops, :c, :h, :w] = images
            images = images2

        infer_request = request_queue.get_idle_request()
        # print(infer_request.request)
        # Infer async

        infer_request.start_async({input_blob: images}, bs, target)
        
        # res = res[out_blob]
        # res = res.reshape(BATCH_SIZE, n_crops,-1)
        # #print(res)
        # res = np.mean(res, axis=1)
        # if bs != BATCH_SIZE:
        #     print(res.shape)
        #     res = res[:bs, :res.shape[1]]
        # #print(res)
        # pred = torch.cat((pred, torch.from_numpy(res)), 0)
        # #print(res.shape)

    # wait the latest inference executions
    request_queue.wait_all()
    for i, queue in enumerate(request_queue.requests):
        # print(i, queue)
        gt = torch.cat((gt, queue.get_ground_truth()), 0)
        pred = torch.cat((pred, queue.get_prediction()), 0)
        
    print('Elapsed time: %0.2f sec.' % (timeit.default_timer() - now))

    AUROCs = compute_AUCs(gt, pred)
    AUROC_avg = np.array(AUROCs).mean()
    print('The average AUROC is {AUROC_avg:.3f}'.format(AUROC_avg=AUROC_avg))
    for i in range(N_CLASSES):
        print('The AUROC of {} is {:.3f}'.format(CLASS_NAMES[i], AUROCs[i]))

def roc_auc_score_FIXED(y_true, y_pred):
    if len(np.unique(y_true)) == 1:
        return accuracy_score(y_true, np.rint(y_pred))
    return roc_auc_score(y_true, y_pred)

        
def compute_AUCs(gt, pred):
    """Computes Area Under the Curve (AUC) from prediction scores.

    Args:
        gt: Pytorch tensor on GPU, shape = [n_samples, n_classes]
          true binary labels.
        pred: Pytorch tensor on GPU, shape = [n_samples, n_classes]
          can either be probability estimates of the positive class,
          confidence values, or binary decisions.

    Returns:
        List of AUROCs of all classes.
    """
    AUROCs = []
    gt_np = gt.cpu().numpy()
    pred_np = pred.cpu().numpy()
    for i in range(N_CLASSES):
        AUROCs.append(roc_auc_score_FIXED(gt_np[:, i], pred_np[:, i]))
    return AUROCs

In [None]:
run_async()