In [2]:
%%time 

# Importing the libraries

%reload_ext autoreload
%autoreload 2

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import sys
sys.path.append('../../')

import funcs 
import load_data
import mlflow
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import subprocess
from time import time
import git
import matplotlib.pyplot as plt
from scipy.special import bdtrc

import warnings

warnings.filterwarnings('ignore')

%reload_ext load_data
%reload_ext funcs

CPU times: user 25.9 ms, sys: 22.2 ms, total: 48.1 ms
Wall time: 47.8 ms


In [3]:
def setting_up_gpu():

    config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads=5, intra_op_parallelism_threads=5) # , device_count={"GPU":1, "CPU": 10})
    sess   = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)

    return sess


def save_artifacts(dataframe='' , name='', artifact_path=''):
    path = f'../../{name}'
    dataframe.to_csv(path)
    mlflow.log_artifact(path, artifact_path=artifact_path)

# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 1. Dataset </span>

We utilized chest X-ray images for our ensemble experiment as they are one of the most frequently used and publicly available radiological modalities. We conducted our investigations utilizing data from [CheXpert](https://arxiv.org/abs/1901.07031). This data set contains 224 316 chest radiographs of 65 240 patients that have been tagged for the presence of the 14 most frequently reported pathologies in clinical reports. The other objective behind using this dataset was to compare the proposed technique performance to our previous work performed using a single model. Finally we investigate the effectiveness of our measured confidence score to the ones obtained by their counterpart in [Tao et al.](https://link.springer.com/article/10.1007/s10115-020-01475-y).
 
### <span style="color:Orange; font-family:PT Sans narrow; font-size:1.3em"> 1.1.1 Pathologies/Classes  </span>

![pathologies](media/chexpert.png)

### <span style="color:Orange; font-family:PT Sans narrow; font-size:1.3em"> 1.1.2 Setting the order of pathologies  </span>

In [4]:
dataset = 'chexpert'
dir     = '/groups/jjrodrig/projects/chest/dataset/' + dataset + '/'

pathologies = ["No Finding", "Enlarged Cardiomediastinum" , "Cardiomegaly" , "Lung Opacity" , "Lung Lesion", "Edema" , "Consolidation" , "Pneumonia" , "Atelectasis" , "Pneumothorax" , "Pleural Effusion" , "Pleural Other" , "Fracture" , "Support Devices"]

# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 2. MLflow </span>

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 2.1 Creating a ssh-tunnel to server in the background </span>


### <span style="color:Orange; font-family:PT Sans narrow; font-size:1.3em"> 2.1.1 **Step1**: Save the ssh credentials  </span>

ssh-copy-id [username]@[server-ip] 



### <span style="color:Orange; font-family:PT Sans narrow; font-size:1.3em"> 2.1.2 **Step2**: Connect to the server in the background  </span>

ssh -N -L 5000:localhost:5432 [username]@[server-ip] 

In [5]:
command     = 'ssh -N -L 5000:localhost:5432 artinmajdi@data7-db1.cyverse.org &'
ssh_session = subprocess.Popen('exec ' + command, stdout=subprocess.PIPE, shell=True)

### <span style="color:Orange; font-family:PT Sans narrow; font-size:1.3em"> 2.1.3 `Mlflow IDs` for all trained models on Chexpert dataset </span>

In [6]:
model_experiment_name = 'soft_weighted_MV_aim1_3'

run_id_models = {   'ResNet50V2':       'e98f3c431281497e8155d7384b11cca9',
                    'InceptionV3':      '108673cc2258460d961a1da71942a30d',
                    'InceptionResNetV2':'500f8449a371444188ae9fdee950a0c5',
                    'EfficientNetB0':   'aa829e405f904b7e865b5cc8f621a0e4',
                    'DenseNet121':      'f857040aa1284bdb8b932aacd37379cb',
                    'MobileNetV2':      '24c9eb3e84c1407698dd08a174ae9008',
                    'ResNet101V2':      '5aed61485804409b8dd9ec5419f26697',
                    'DenseNet169':      'afc854bea43e49a08992a2cfb1d94c98',
                    'VGG16':            '255bf0aae1e74b228618ea5c3ce0efb5',
                    'DenseNet201':      '6f72b8f68de74ea5a7027c0f288e1e28'}

run_id_list_valid_full = {  'ResNet50V2':       '7c50e57cbc574a898f542ebd8603fa6b',
                            'InceptionV3':      'a35b54b6a74747df8388d67ba5f1966c',
                            'InceptionResNetV2':'00619d5cf0a84d82a68f7c97f4c5f575',
                            'EfficientNetB0':   '5969b09160af40339135257e17cc6744',
                            'DenseNet121':      '59eb1cb557af457f8846c7dca5e70090',
                            'MobileNetV2':      '193ad9cf68374a3db1fdbb4473e37bbd',
                            'ResNet101V2':      '3828ffa16106434c9d153341ba5647f3',
                            'DenseNet169':      '3aea8516a073408ba17ff1c4aca6d76a',
                            'VGG16':            'a6da14800fad4b659c5145ec4874b5ea',
                            'DenseNet201':      'c12fb1de7cbd4b5fb6be8bc2a9929a21'}

run_id_list_valid_small = { 'ResNet50V2':       'c17586d793a24d9588b4f591e9ec2583',
                            'InceptionV3':      '81cfa24dee7f489189c5459b33a96655',
                            'InceptionResNetV2':'20aa2cead9a848e596f0866bb7f70d02',
                            'EfficientNetB0':   '8306c39137d44296acf7c646b7696663',
                            'DenseNet121':      '1f3d5510c42b4db294d41133069e2a69',
                            'MobileNetV2':      'c65f33ccffeb4b1f870fb9d761d35ffc',
                            'ResNet101V2':      'ca5fc8e86eb34760a20e45cb11d04c23',
                            'DenseNet169':      '13dad77b9870462fbe449c511d374fae',
                            'VGG16':            '04cf287cb35145c495f1e6851b8ff118',
                            'DenseNet201':      '508afe976f5d4cb093acb4d98c9ce1bb'}

run_id_list_test = { 'ResNet50V2':       '4b853d6dfdf44f73be4031161e8b714c',
                     'InceptionV3':      '5351397f046f42a0b698d664dd122a22',
                     'InceptionResNetV2':'06eabd82e54745aea0c8258fba710b51',
                     'EfficientNetB0':   'fe42bc598fae4f6d94f72bd664200cad',
                     'DenseNet121':      'adc231040ccc44f8a835e02d42dcca1d',
                     'MobileNetV2':      '4b7f4c085b6e45689ae1b36ef5ada964',
                     'ResNet101V2':      '93ba5a71d9aa45e9888f50d1bcefe449',
                     'DenseNet169':      '64911772e152421ca6ec01de20b39910',
                     'VGG16':            'd970926c29bf4716996204f3206e7a90',
                     'DenseNet201':      '5297e4094113432284a3c6f07c4efb1e'}

model_names_list = list(run_id_models.keys())


data_mode = 'test'

if data_mode == 'valid_full':
    run_id_list = run_id_list_valid_full
    max_sample  = 1000000

if data_mode == 'valid_142samples':
    run_id_list = run_id_list_valid_small
    max_sample  = 1000

if data_mode == 'test':
    run_id_list = run_id_list_test
    max_sample  = 1000000

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 2.2 Setting-up MLflow Config </span>

In [7]:
# mlflow server/artifact uri
server, artifact = funcs.mlflow_settings()
mlflow.set_tracking_uri(server)


# creating/setting the experiment
client = mlflow.tracking.MlflowClient()
experiment_name = 'soft_weighted_MV_aim1_3'

if not client.get_experiment_by_name(experiment_name):
    mlflow.create_experiment(name=experiment_name, artifact_location=artifact)

mlflow.set_experiment(experiment_name=experiment_name)


# Starting the parent MLflow 
# parent_run = mlflow.start_run(run_name='final results with bench mark comparisons')
parent_run = mlflow.start_run(run_id='525741547c3d4d30952b33b538bd6643')

mlflow.set_tag(f'mlflow.note.content',f'run_id: {parent_run.info.run_id}')

# Starting the children MLflow 
run = mlflow.start_run(run_name=data_mode, nested=True)
mlflow.set_tag(f'mlflow.note.content',f'run_id: {run.info.run_id}')

### <span style="color:Orange; font-family:PT Sans Narrow; font-size:1.3em"> 2.2.1 Saving the Git commit  (only in Jupyter notebook) </span>

In [8]:
repo = git.Repo(search_parent_directories=True)
git_commit_hash = repo.head.object.hexsha
print('git commit hash', git_commit_hash)
mlflow.set_tag('mlflow.source.git.commit', git_commit_hash)

git commit hash 449ea8197ed2e567f38d666cfc2049111e088db3


# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 3. Downloading pretrained data </span>

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 3.1 Loading the truth labels </span>

In [9]:
%%time

RUN_ON_VALIDATION = True
RUN_ON_TEST       = False

if 'valid' in data_mode:
    Data, Info = load_data.load_chest_xray(dir=dir, dataset=dataset, batch_size=40, mode='valid_df', max_sample=max_sample)
    truth = Data.dataframe['valid'][pathologies] > 0.5

elif 'test' in data_mode:
    Data, Info = load_data.load_chest_xray(dir=dir, dataset=dataset, batch_size=40, mode='test',     max_sample=max_sample)  
    truth = Data.dataframe['test'][pathologies] > 0.5

before sample-pruning
train: (223414, 19)
test: (234, 19)

after sample-pruning
train (certain): (86920, 20)
train (uncertain): (52940, 20)
valid: (21730, 20)
test: (169, 20) 

Found 169 validated image filenames.
Found 169 validated image filenames.
CPU times: user 20.6 s, sys: 127 ms, total: 20.7 s
Wall time: 21.4 s


### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 3.1.1 Downloading the probability and uncertainty artifacts </span>

In [10]:
%%time 

dir_main = '/home/u29/mohammadsmajdi/projects/chest_xray/temp_aim1_3'

# removing any older folder
if os.path.isdir(dir_main):     
    command = f'rm -r {dir_main}'
    ssh_session = subprocess.Popen('exec ' + command, stdout=subprocess.PIPE, shell=True)
    ssh_session.wait()

os.mkdir(dir_main)


# downloading the artifacts
for model_name in model_names_list:

    run_id        = run_id_list[model_name]
    session_stats = mlflow.get_run(run_id=run_id)

    client = mlflow.tracking.MlflowClient()

    local_dir = f'{dir_main}/{model_name}'
    if not os.path.isdir(local_dir): 
        os.mkdir(local_dir)

    full_path = client.download_artifacts(run_id=run_id, path='', dst_path=local_dir)


# reading the probability and uncertainty data
uncertainty_all_models, prob_all_models, prob_all_models_binary = {}, {}, {}
for model_name in model_names_list:

    path_uncertainty = f'{dir_main}/{model_name}/uncertainty_{model_name}.csv'
    path_probability = f'{dir_main}/{model_name}/prob_{model_name}_orig.csv'

    uncertainty_all_models[model_name] = pd.read_csv(path_uncertainty)[pathologies].set_index(truth.index)
    prob_all_models[model_name]        = pd.read_csv(path_probability)[pathologies].set_index(truth.index)
    prob_all_models_binary[model_name] = (prob_all_models[model_name] > 0.5)


    save_artifacts(dataframe=uncertainty_all_models[model_name], name=f'{model_name}.csv', artifact_path='steps/1_uncertainty')
    save_artifacts(dataframe=prob_all_models[model_name],        name=f'{model_name}.csv', artifact_path='steps/1_prob')

CPU times: user 2.7 s, sys: 352 ms, total: 3.05 s
Wall time: 31.2 s


# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 4. Deleting the bad models </span>

**To-Do:**

- [] report the results without these bad networks (perhaps try to find one that Tao is different than MV
- [] then in the discussion mention that Tao gets better for some classes when we add the bad models

In [11]:
model_names_list.remove('ResNet50V2')
model_names_list.remove('DenseNet121')
model_names_list.remove('DenseNet169')
model_names_list

['InceptionV3',
 'InceptionResNetV2',
 'EfficientNetB0',
 'MobileNetV2',
 'ResNet101V2',
 'VGG16',
 'DenseNet201']

# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 5. Main Technique (weighted MV) </span>

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 5.1 Measuring accuracy/avg-uncertainty for all labelers </span>

In [12]:
accuracy , uncertainty_mean , prob_MV = pd.DataFrame() , pd.DataFrame() , 0

for model_name in model_names_list:

    # measuring the accuracy for each labeler
    accuracy[model_name] = ( truth == prob_all_models_binary[model_name] ).mean(axis=0)

    #measuring consensus voting for conventional MV
    prob_MV += prob_all_models_binary[model_name].astype(float) / len(model_names_list)

    # measuring average uncertainty for each labeler
    uncertainty_mean[model_name] = uncertainty_all_models[model_name].mean(axis=0)


# Measuring accuracy for conventional MV
prob_MV_binary        = ( prob_MV > 0.5 )
accuracy['MV_binary'] = ( truth == prob_MV_binary ).mean(axis=0)

save_artifacts(dataframe=uncertainty_mean , name='mean.csv'           , artifact_path='steps/1_uncertainty')
save_artifacts(dataframe=prob_MV_binary   , name='prob_MV_binary.csv' , artifact_path='prob_MV')


accuracy.round(decimals=3)

Unnamed: 0,InceptionV3,InceptionResNetV2,EfficientNetB0,MobileNetV2,ResNet101V2,VGG16,DenseNet201,MV_binary
No Finding,0.071,0.071,0.071,0.071,0.071,0.071,0.071,0.071
Enlarged Cardiomediastinum,0.503,0.396,0.391,0.396,0.396,0.391,0.391,0.391
Cardiomegaly,0.692,0.645,0.609,0.627,0.639,0.615,0.633,0.621
Lung Opacity,0.751,0.716,0.355,0.521,0.775,0.645,0.84,0.751
Lung Lesion,0.911,0.746,0.994,0.994,0.876,0.994,0.959,0.976
Edema,0.71,0.734,0.757,0.787,0.68,0.757,0.633,0.74
Consolidation,0.817,0.811,0.811,0.811,0.817,0.811,0.817,0.811
Pneumonia,0.953,0.953,0.953,0.953,0.953,0.953,0.953,0.953
Atelectasis,0.586,0.58,0.58,0.58,0.58,0.58,0.58,0.58
Pneumothorax,0.84,0.805,0.97,0.959,0.663,0.97,0.574,0.888


## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 5.2 Measuring weights for each labeler </span>

### <span style="color:Orange; font-family:PT Sans Narrow; font-size:1.3em"> 5.2.1 1st METHOD: </span>

### $ T_{x,a,j} = 1 - u_{j} $

***
### <span style="color:Orange; font-family:PT Sans Narrow; font-size:1.3em"> 5.2.2 2nd METHOD: </span>

$ T_{x,a,j} = \left\{
    \begin{array} \\
        1 - u_{j} & y_{a,j} = y'_{j}   \\
        0 & y_{a,j} \neq y'_{j}
    \end{array}
\right. $

***
### $ \hat{w}_{a,j} = \frac {1}{N} \sum_{x} T_{x,a,j}$

In [13]:
prob_MV_binary

Unnamed: 0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,True,False,False,False,False,False,False,False,False,False,False,False,False,False
3,True,False,False,True,False,False,False,False,False,False,True,False,False,False
4,True,False,False,False,False,False,False,False,False,False,False,False,False,False
5,True,False,False,False,False,False,False,False,False,False,True,False,False,True
6,True,False,False,False,False,False,False,False,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,True,False,False,False,False,False,False,False,False,False,False,False,False,False
230,True,False,False,False,False,False,False,False,False,False,False,False,False,False
231,True,False,False,False,False,False,False,False,False,False,False,False,False,False
232,True,False,False,False,False,False,False,False,False,False,False,False,False,False


In [14]:
%%time

T1, T2, w_hat1, w_hat2 = {}, {}, pd.DataFrame(), pd.DataFrame()

for model_name in model_names_list:

    T1[model_name] = 1 - uncertainty_all_models[model_name]

    T2[model_name] = T1[model_name].copy()
    T2[model_name][ prob_all_models_binary[model_name] != prob_MV_binary ] = 0   

    w_hat1[model_name] = T1[model_name].mean(axis=0)
    w_hat2[model_name] = T2[model_name].mean(axis=0)

    save_artifacts(dataframe=T1[model_name], name=f'T1_{model_name}.csv', artifact_path='steps/2_T1')
    save_artifacts(dataframe=T2[model_name], name=f'T2_{model_name}.csv', artifact_path='steps/2_T2')


save_artifacts(dataframe=w_hat1, name='3_w_hat1.csv', artifact_path='steps')
save_artifacts(dataframe=w_hat2, name='3_w_hat2.csv', artifact_path='steps')

w_hat1.round(decimals=2)

CPU times: user 1.31 s, sys: 107 ms, total: 1.42 s
Wall time: 14.6 s


Unnamed: 0,InceptionV3,InceptionResNetV2,EfficientNetB0,MobileNetV2,ResNet101V2,VGG16,DenseNet201
No Finding,0.57,0.57,0.58,0.57,0.57,0.57,0.57
Enlarged Cardiomediastinum,0.91,0.95,0.92,0.97,0.93,0.95,0.92
Cardiomegaly,0.87,0.83,0.82,0.96,0.91,0.89,0.91
Lung Opacity,0.8,0.78,0.82,0.88,0.78,0.74,0.75
Lung Lesion,0.93,0.86,0.89,0.98,0.91,0.96,0.92
Edema,0.82,0.89,0.86,0.88,0.85,0.8,0.8
Consolidation,0.95,0.95,0.87,0.96,0.95,0.94,0.96
Pneumonia,0.97,0.92,0.96,0.99,0.94,0.96,0.96
Atelectasis,0.91,0.98,0.9,0.94,0.92,0.9,0.91
Pneumothorax,0.9,0.9,0.99,0.98,0.84,0.89,0.82


## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 5.3 Measuring average weight </span>


## $ w_{a,j} = \frac {\hat{w}_{a,j}} {\sum_{a=1}^{L} \hat{w}_{a,j}} $

In [15]:
weights1 = w_hat1.divide( w_hat1.sum(axis=1) , axis='rows')
weights2 = w_hat2.divide( w_hat2.sum(axis=1) , axis='rows')

save_artifacts(dataframe=weights1, name='4_final_weights1.csv', artifact_path='steps')
save_artifacts(dataframe=weights2, name='4_final_weights2.csv', artifact_path='steps')

weights1.round(decimals=3)

Unnamed: 0,InceptionV3,InceptionResNetV2,EfficientNetB0,MobileNetV2,ResNet101V2,VGG16,DenseNet201
No Finding,0.142,0.142,0.147,0.142,0.142,0.142,0.142
Enlarged Cardiomediastinum,0.139,0.145,0.14,0.148,0.142,0.145,0.14
Cardiomegaly,0.141,0.133,0.133,0.155,0.147,0.143,0.147
Lung Opacity,0.144,0.141,0.147,0.159,0.14,0.133,0.136
Lung Lesion,0.144,0.134,0.138,0.152,0.141,0.148,0.143
Edema,0.139,0.15,0.147,0.149,0.143,0.136,0.136
Consolidation,0.144,0.145,0.132,0.146,0.145,0.143,0.145
Pneumonia,0.145,0.138,0.144,0.147,0.14,0.143,0.143
Atelectasis,0.141,0.151,0.14,0.146,0.143,0.14,0.141
Pneumothorax,0.143,0.143,0.157,0.155,0.133,0.141,0.129


## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 5.4 Measuring weighted majority vote </span>

### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 5.4.1 Applying the weights to predicted probabilities: </span>

$ \hat{p}^{prob}_{j} = \sum_{a=1}^{L} p_{a,j} * w_{a,j} $



### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 5.4.2 Applying the weights to predicted labels: </span>

$ \hat{p}^{binary}_{j} = \sum_{a=1}^{L} y_{a,j} * w_{a,j}$ where $y_{a,j} = (p_{a,j} > 0.5) $

In [16]:
%%time

def measuring_weighted_mv(weights):

    prob_MV_weighted_prob, prob_MV_weighted_binary = 0, 0

    for model_name in model_names_list:
        prob_MV_weighted_prob   +=  prob_all_models[model_name]        * weights[model_name]
        prob_MV_weighted_binary += (prob_all_models[model_name] > 0.5) * weights[model_name]

    return prob_MV_weighted_prob, prob_MV_weighted_binary

prob_MV_weighted_prob1, prob_MV_weighted_binary1 = measuring_weighted_mv(weights=weights1)
prob_MV_weighted_prob2, prob_MV_weighted_binary2 = measuring_weighted_mv(weights=weights2)

save_artifacts(dataframe=prob_MV_weighted_prob1   , name='prob_MV_weighted_prob1.csv'   , artifact_path='prob_MV')
save_artifacts(dataframe=prob_MV_weighted_binary1 , name='prob_MV_weighted_binary1.csv' , artifact_path='prob_MV')

save_artifacts(dataframe=prob_MV_weighted_prob2   , name='prob_MV_weighted_prob2.csv'   , artifact_path='prob_MV')
save_artifacts(dataframe=prob_MV_weighted_binary2 , name='prob_MV_weighted_binary2.csv' , artifact_path='prob_MV')

prob_MV_weighted_prob1.round(decimals=3)

CPU times: user 302 ms, sys: 28.8 ms, total: 331 ms
Wall time: 3.92 s


Unnamed: 0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,0.995,0.057,0.148,0.278,0.081,0.181,0.085,0.051,0.099,0.095,0.218,0.031,0.078,0.243
3,0.992,0.119,0.253,0.608,0.215,0.366,0.172,0.111,0.193,0.221,0.440,0.090,0.209,0.509
4,0.990,0.048,0.094,0.147,0.054,0.113,0.065,0.027,0.067,0.042,0.143,0.031,0.053,0.158
5,0.994,0.139,0.298,0.470,0.201,0.377,0.137,0.107,0.178,0.217,0.393,0.117,0.219,0.450
6,0.994,0.086,0.157,0.432,0.090,0.291,0.096,0.057,0.140,0.160,0.387,0.048,0.119,0.395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,0.990,0.057,0.108,0.194,0.061,0.161,0.073,0.034,0.089,0.070,0.177,0.034,0.068,0.208
230,0.996,0.049,0.101,0.197,0.055,0.149,0.065,0.032,0.081,0.064,0.168,0.026,0.059,0.196
231,0.994,0.081,0.197,0.327,0.082,0.275,0.082,0.059,0.111,0.115,0.232,0.036,0.084,0.294
232,0.996,0.043,0.088,0.145,0.048,0.109,0.059,0.025,0.061,0.044,0.140,0.024,0.050,0.158


## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 5.5 Measuring the new accuracies </span>

In [17]:
accuracy['MV_weighted_prob1']   = ( (prob_MV_weighted_prob1   > 0.5) == truth ).mean(axis=0)
accuracy['MV_weighted_binary1'] = ( (prob_MV_weighted_binary1 > 0.5) == truth ).mean(axis=0)

accuracy['MV_weighted_prob2']   = ( (prob_MV_weighted_prob2   > 0.5) == truth ).mean(axis=0)
accuracy['MV_weighted_binary2'] = ( (prob_MV_weighted_binary2 > 0.5) == truth ).mean(axis=0)

save_artifacts(dataframe=accuracy, name='accuracy.csv', artifact_path='')

accuracy.round(decimals=2)

Unnamed: 0,InceptionV3,InceptionResNetV2,EfficientNetB0,MobileNetV2,ResNet101V2,VGG16,DenseNet201,MV_binary,MV_weighted_prob1,MV_weighted_binary1,MV_weighted_prob2,MV_weighted_binary2
No Finding,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07
Enlarged Cardiomediastinum,0.5,0.4,0.39,0.4,0.4,0.39,0.39,0.39,0.39,0.39,0.39,0.39
Cardiomegaly,0.69,0.64,0.61,0.63,0.64,0.62,0.63,0.62,0.65,0.62,0.63,0.62
Lung Opacity,0.75,0.72,0.36,0.52,0.78,0.64,0.84,0.75,0.73,0.75,0.73,0.75
Lung Lesion,0.91,0.75,0.99,0.99,0.88,0.99,0.96,0.98,0.99,0.98,0.99,0.98
Edema,0.71,0.73,0.76,0.79,0.68,0.76,0.63,0.74,0.77,0.74,0.77,0.74
Consolidation,0.82,0.81,0.81,0.81,0.82,0.81,0.82,0.81,0.81,0.81,0.81,0.81
Pneumonia,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95
Atelectasis,0.59,0.58,0.58,0.58,0.58,0.58,0.58,0.58,0.58,0.58,0.58,0.58
Pneumothorax,0.84,0.8,0.97,0.96,0.66,0.97,0.57,0.89,0.95,0.89,0.96,0.89


In [18]:
reported_results = accuracy[['MV_weighted_prob1' , 'MV_weighted_prob2' , 'MV_binary']].round(decimals=2)

# PROPOSED_METHODS = ['proposed', 'proposed_penalized']

reported_results = reported_results.rename(columns={'MV_weighted_prob1':'proposed' , 'MV_weighted_prob2':'proposed_penalized' , 'MV_binary':'MV'})
reported_results

Unnamed: 0,method1,method2,MV
No Finding,0.07,0.07,0.07
Enlarged Cardiomediastinum,0.39,0.39,0.39
Cardiomegaly,0.65,0.63,0.62
Lung Opacity,0.73,0.73,0.75
Lung Lesion,0.99,0.99,0.98
Edema,0.77,0.77,0.74
Consolidation,0.81,0.81,0.81
Pneumonia,0.95,0.95,0.95
Atelectasis,0.58,0.58,0.58
Pneumothorax,0.95,0.96,0.89


In [24]:
reported_results # .drop(columns=['proposed']).rename(columns={'proposed_penalized':'proposed'})

Unnamed: 0,proposed,MV
No Finding,0.07,0.07
Enlarged Cardiomediastinum,0.39,0.39
Cardiomegaly,0.63,0.62
Lung Opacity,0.73,0.75
Lung Lesion,0.99,0.98
Edema,0.77,0.74
Consolidation,0.81,0.81
Pneumonia,0.95,0.95
Atelectasis,0.58,0.58
Pneumothorax,0.96,0.89


# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 6. Confidence Score </span>
## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 6.1 Confidence score: _Weighted-soft-MV_ </span>

In [18]:
# reversing the disease/model order
delta = { disease: pd.DataFrame() for disease in pathologies }

for disease in pathologies:
    for model_name in model_names_list:

        delta[disease][model_name] = prob_all_models_binary[model_name][disease]

# delta[disease]

### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 6.1.1 measuring the certainty score of majority class $ P_{x,j} $ </span>

In actual formula this is divided by weights.sum(axis=1). But because weights sum to 1, its values would be 1.

Also pandas automatically transfers the binary values in delta\[disease\] to float before doing the multiplication.

## $ P_{x,j} = \frac { \sum_{a=1}^{L} {ω_{a,j} δ(y_{a,j},+)} } { \sum_{a=1}^{L} {ω_{a,j} δ(y_{a,j},+)}  +  \sum_{a=1}^{L} {ω_{a,j} δ(y_{a,j},-)} }$

In [19]:
# For L labelers, weights needs to sum to L. therefore we multiply the weights by L.
L = len(model_names_list)

delta_weighted_pos1 = { disease:     delta[disease]   * weights1.T[disease] * L for disease in pathologies}
delta_weighted_neg1 = { disease: ( ~ delta[disease] ) * weights1.T[disease] * L for disease in pathologies}

delta_weighted_pos2 = { disease:     delta[disease]   * weights2.T[disease] * L for disease in pathologies}
delta_weighted_neg2 = { disease: ( ~ delta[disease] ) * weights2.T[disease] * L for disease in pathologies}

P1, P2 = pd.DataFrame(), pd.DataFrame()
for disease in pathologies:

    denominator = L # weights.sum(axis=1) 
    P1[disease] = delta_weighted_pos1[disease].sum(axis=1) / denominator
    P2[disease] = delta_weighted_pos2[disease].sum(axis=1) / denominator

P1.round(decimals=3)

Unnamed: 0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,1.0,0.087,0.085,0.361,0.090,0.178,0.0,0.000,0.000,0.000,0.367,0.000,0.083,0.469
3,1.0,0.087,0.180,0.682,0.185,0.291,0.0,0.000,0.000,0.375,0.682,0.093,0.181,0.566
4,1.0,0.000,0.000,0.181,0.000,0.000,0.0,0.000,0.000,0.000,0.200,0.000,0.000,0.197
5,1.0,0.087,0.276,0.565,0.290,0.477,0.0,0.000,0.000,0.371,0.669,0.000,0.276,0.668
6,1.0,0.087,0.085,0.565,0.090,0.373,0.0,0.000,0.000,0.183,0.669,0.000,0.181,0.668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,1.0,0.000,0.000,0.361,0.000,0.087,0.0,0.000,0.000,0.000,0.378,0.000,0.000,0.373
230,1.0,0.000,0.000,0.361,0.000,0.178,0.0,0.000,0.092,0.000,0.367,0.000,0.000,0.373
231,1.0,0.087,0.181,0.462,0.100,0.178,0.0,0.091,0.000,0.086,0.467,0.000,0.000,0.469
232,1.0,0.087,0.085,0.269,0.000,0.087,0.0,0.000,0.000,0.000,0.278,0.000,0.000,0.281


### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 6.1.2 certainty of majority class for both positive & negative labels </span>

$F_{x,j} = max \Big(P_{x,j} , 1-P_{x,j} \Big)$

In [20]:
F1 = P1.copy()
F2 = P2.copy()

F1[P1 < 0.5] = (1 - P1)[P1 < 0.5]
F2[P2 < 0.5] = (1 - P2)[P2 < 0.5]

F1.round(decimals=3)

Unnamed: 0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,1.0,0.913,0.915,0.639,0.910,0.822,1.0,1.000,1.000,1.000,0.633,1.000,0.917,0.531
3,1.0,0.913,0.820,0.682,0.815,0.709,1.0,1.000,1.000,0.625,0.682,0.907,0.819,0.566
4,1.0,1.000,1.000,0.819,1.000,1.000,1.0,1.000,1.000,1.000,0.800,1.000,1.000,0.803
5,1.0,0.913,0.724,0.565,0.710,0.523,1.0,1.000,1.000,0.629,0.669,1.000,0.724,0.668
6,1.0,0.913,0.915,0.565,0.910,0.627,1.0,1.000,1.000,0.817,0.669,1.000,0.819,0.668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,1.0,1.000,1.000,0.639,1.000,0.913,1.0,1.000,1.000,1.000,0.622,1.000,1.000,0.627
230,1.0,1.000,1.000,0.639,1.000,0.822,1.0,1.000,0.908,1.000,0.633,1.000,1.000,0.627
231,1.0,0.913,0.819,0.538,0.900,0.822,1.0,0.909,1.000,0.914,0.533,1.000,1.000,0.531
232,1.0,0.913,0.915,0.731,1.000,0.913,1.0,1.000,1.000,1.000,0.722,1.000,1.000,0.719


### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 6.1.3 Average confidence store: _Weighted-soft-MV_ </span>

In [21]:
reported_results['F1'] = F1.mean().round(decimals=2)
reported_results['F1']

No Finding                    1.00
Enlarged Cardiomediastinum    0.94
Cardiomegaly                  0.85
Lung Opacity                  0.74
Lung Lesion                   0.90
Edema                         0.76
Consolidation                 1.00
Pneumonia                     0.99
Atelectasis                   0.99
Pneumothorax                  0.82
Pleural Effusion              0.75
Pleural Other                 0.96
Fracture                      0.88
Support Devices               0.75
Name: F1, dtype: float64

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 6.2 Confidence score: _Beta-soft-MV_ </span>

> **Note:** This is measured only for METHOD1 since it has a higher accuracy

### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 6.2.1  measuring the certainty score of majority class  $f_{x,j}^{-+}$ </span>

$f^{+}_{x,j}≔1+\sum_{a=1}^{L}ω_{a,j}  \delta \big( y_{a,j},+ \big) $

$f_{x,j}^{-}≔1+\sum_{a=1}^{L}ω_{a,j}  \delta \big( y_{a,j},- \big) $

In [22]:
f_pos = pd.DataFrame()
f_neg = pd.DataFrame()

for disease in pathologies:

    f_pos[disease] = 1 + delta_weighted_pos1[disease].sum(axis=1)
    f_neg[disease] = 1 + delta_weighted_neg1[disease].sum(axis=1)


k_df = f_neg.floordiv(1)
n_df = (f_neg + f_pos).floordiv(1) - 1

### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 6.2.2 Measuring the regularized incomplete beta function </span>


$I_{x} (α,β)=F(x;α,β)=\frac{ B(x;α,β) }{B(α,β)} $

$ bdtrc(k,n,p) = I_{p} \Big( \lfloor {k} \rfloor + 1 , n - \lfloor {k} \rfloor \Big) = \sum_{j = \lfloor {k} \rfloor + 1} ^ {n} \binom {n}{j}p^{j}(1-p)^{n-j} $

[bdtrc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.bdtrc.html)

In [23]:
I = k_df.copy()
for disease in pathologies:
    for index in n_df.index:

        k = k_df.loc[index, disease]
        n = n_df.loc[index, disease]
        p = 0.5

        I.loc[index, disease] = bdtrc(k,n,p)

I.round(decimals=3)

Unnamed: 0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,0.989,0.000,0.000,0.113,0.000,0.001,0.0,0.0,0.0,0.000,0.113,0.0,0.000,0.274
3,0.989,0.000,0.006,0.623,0.006,0.011,0.0,0.0,0.0,0.113,0.726,0.0,0.006,0.500
4,0.989,0.000,0.000,0.006,0.000,0.000,0.0,0.0,0.0,0.000,0.033,0.0,0.000,0.006
5,0.989,0.000,0.033,0.377,0.033,0.172,0.0,0.0,0.0,0.055,0.726,0.0,0.033,0.726
6,0.989,0.000,0.000,0.377,0.000,0.055,0.0,0.0,0.0,0.006,0.726,0.0,0.006,0.726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,0.989,0.000,0.000,0.113,0.000,0.000,0.0,0.0,0.0,0.000,0.113,0.0,0.000,0.113
230,0.989,0.000,0.000,0.113,0.000,0.001,0.0,0.0,0.0,0.000,0.113,0.0,0.000,0.113
231,0.989,0.000,0.006,0.274,0.000,0.001,0.0,0.0,0.0,0.000,0.274,0.0,0.000,0.274
232,0.989,0.000,0.000,0.033,0.000,0.000,0.0,0.0,0.0,0.000,0.011,0.0,0.000,0.033


### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 6.2.3 Confidence score </span>

$F_{x,j} = max(I_{p} , 1-I_{p})$

In [24]:
F = I.copy()
F[I < 0.5] = (1-I)[I < 0.5]

F.round(decimals=3)

Unnamed: 0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,0.989,1.000,1.000,0.887,1.000,0.999,1.0,1.0,1.0,1.000,0.887,1.0,1.000,0.726
3,0.989,1.000,0.994,0.623,0.994,0.989,1.0,1.0,1.0,0.887,0.726,1.0,0.994,0.500
4,0.989,1.000,1.000,0.994,1.000,1.000,1.0,1.0,1.0,1.000,0.967,1.0,1.000,0.994
5,0.989,1.000,0.967,0.623,0.967,0.828,1.0,1.0,1.0,0.945,0.726,1.0,0.967,0.726
6,0.989,1.000,1.000,0.623,1.000,0.945,1.0,1.0,1.0,0.994,0.726,1.0,0.994,0.726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,0.989,1.000,1.000,0.887,1.000,1.000,1.0,1.0,1.0,1.000,0.887,1.0,1.000,0.887
230,0.989,1.000,1.000,0.887,1.000,0.999,1.0,1.0,1.0,1.000,0.887,1.0,1.000,0.887
231,0.989,1.000,0.994,0.726,1.000,0.999,1.0,1.0,1.0,1.000,0.726,1.0,1.000,0.726
232,0.989,1.000,1.000,0.967,1.000,1.000,1.0,1.0,1.0,1.000,0.989,1.0,1.000,0.967


### <span style="color:orange; font-family:PT Sans Narrow; font-size:1.3em"> 6.2.4 Average confidence store: _Beta-soft-MV_ </span>

In [25]:
reported_results['F2'] = F.mean().round(decimals=2)
reported_results['F2']

No Finding                    0.99
Enlarged Cardiomediastinum    1.00
Cardiomegaly                  0.94
Lung Opacity                  0.87
Lung Lesion                   0.98
Edema                         0.89
Consolidation                 1.00
Pneumonia                     1.00
Atelectasis                   1.00
Pneumothorax                  0.93
Pleural Effusion              0.87
Pleural Other                 1.00
Fracture                      0.96
Support Devices               0.87
Name: F2, dtype: float64

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 6.3 Final results to be reported </span>

In [26]:
reported_results

Unnamed: 0,method1,method2,MV,F1,F2
No Finding,0.07,0.07,0.07,1.0,0.99
Enlarged Cardiomediastinum,0.39,0.39,0.39,0.94,1.0
Cardiomegaly,0.67,0.66,0.64,0.85,0.94
Lung Opacity,0.82,0.8,0.8,0.74,0.87
Lung Lesion,0.99,0.99,0.98,0.9,0.98
Edema,0.72,0.72,0.74,0.76,0.89
Consolidation,0.81,0.81,0.81,1.0,1.0
Pneumonia,0.95,0.95,0.95,0.99,1.0
Atelectasis,0.58,0.58,0.58,0.99,1.0
Pneumothorax,0.93,0.93,0.88,0.82,0.93


# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 7. Benchmark </span>

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 7.1 Overall quality of different workers </span>

Estimating the overall qualities of different workers is not a new research topic in the crowdsourcing learning community. To the best of the authors’ knowledge, there exist many state-of-the-art algorithms, such as Dawid–Skene [1], ZenCrowd , KOS [9], and DEW [15, 23]. However, none of them exploit feature vectors of instances, which makes it impossible to take full advantage of the statistical characteristics of the available data when evaluating the label qualities. According to the observation by [30], in traditional supervised learning, there exists a schema to exhibit the relationship between data features and the ground-truth labels. For example, suppose there exists a high-quality worker; the data schema will be well-inherited in their labels, because the difference between their labels and ground-truth labels is small. Meanwhile, suppose there exists a low-quality worker, the data schema may be broken because their labels will be very different from the ground-truth labels. Therefore, we can estimate the overall quality of a worker by evaluating how well the schema is inherited in their labels. Specifically, we can first extract all training instances’ feature vectors and the corresponding crowd labels provided by the jth worker to form a new single-label data set. Then, we use tenfold cross-validation to evaluate the classification accuracy of a classifier. In theory, this classifier can be any classifier. Finally, we define the overall quality of the jth worker as the classification accuracy of the built classifier. The detailed formula can be expressed as

### $ \tau_{a} = \frac {\sum_{i=1}^{n} \delta \Big( f_{a}(x_{i}) , I_{i,a}  \Big)}{n} $
    where n is the size of the extracted data set and $f_{j}(x_{i})$ is the class label of the feature vector $x_{i}$ predicted by the built classifier.

In [27]:
tau = accuracy[model_names_list]
# tau.round(decimals=2)

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 7.2 Specific quality of the $j_{th}$ worker for the $i_{th}$ instance ($s_{ij}$) </span>

## $ s_{x,a} = \sum^{a'=L}_{ (a'=1) \land (a' \neq a) } \delta \Big( l_{x,a},l_{x,a'} \Big) $

In [28]:
# number of labelers
M = len(model_names_list)

# number of true and false labels for each class and sample
true_counts  = pd.DataFrame({ disease: delta[disease].sum(axis=1) for disease in pathologies })
false_counts = M - true_counts


# measuring the "specific quality of instanses"
s = { disease: delta[disease].copy() for disease in pathologies }

for disease in pathologies:
    for model_name in model_names_list:
        
        s[disease][model_name] = s[disease][model_name].where(   delta[disease][model_name] , false_counts[disease] - 1)
        s[disease][model_name] = s[disease][model_name].where( ~ delta[disease][model_name] , true_counts[disease]  - 1)


# s['Cardiomegaly']

## $ \gamma_{x,a} =\tau_{x,a}(1 + s_{x,a}^{2}) $

In [29]:
gamma = {}
for disease in pathologies:

    gamma[disease] = (1 + s[disease] ** 2) * tau.T[disease]

## $ w'_{x,a} = \frac {1} {1 + e^{-\gamma_{x,a}} } $

In [30]:
%%time 

W_hat_Tao_et_al = {}
for disease in pathologies:

    W_hat_Tao_et_al[disease] = gamma[disease].applymap(lambda x: 1/(1 + np.exp(-x)) )

# W_hat_Tao_et_al[disease]

CPU times: user 69.8 ms, sys: 2.97 ms, total: 72.8 ms
Wall time: 74 ms


### $ Z = \frac {1}{L} \sum_{a=1}^{L}w'_{x,a}  $
` Z is a normalization constant, which ensures that the sum of all crowd label weights for the ith instance is still equal to m `

In [31]:
z = pd.DataFrame( {disease: W_hat_Tao_et_al[disease].mean(axis=1) for disease in pathologies} )

z.round(decimals=2)

Unnamed: 0,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,1.0,0.97,0.97,1.00,0.96,0.94,1.0,1.00,1.00,1.00,1.00,1.00,0.96,1.00
3,1.0,0.97,0.96,0.98,0.96,0.98,1.0,1.00,1.00,1.00,0.99,0.97,0.96,1.00
4,1.0,1.00,1.00,0.96,1.00,1.00,1.0,1.00,1.00,1.00,0.94,1.00,1.00,0.95
5,1.0,0.97,0.99,1.00,0.99,1.00,1.0,1.00,1.00,1.00,0.99,1.00,0.99,0.98
6,1.0,0.97,0.97,1.00,0.96,1.00,1.0,1.00,1.00,0.95,0.99,1.00,0.96,0.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,1.0,1.00,1.00,1.00,1.00,0.96,1.0,1.00,1.00,1.00,1.00,1.00,1.00,1.00
230,1.0,1.00,1.00,1.00,1.00,0.94,1.0,1.00,0.97,1.00,1.00,1.00,1.00,1.00
231,1.0,0.97,0.96,1.00,0.97,0.94,1.0,0.97,1.00,0.96,1.00,1.00,1.00,1.00
232,1.0,0.97,0.97,0.99,1.00,0.96,1.0,1.00,1.00,1.00,0.97,1.00,1.00,0.98


## $ w_{x,j} = \frac {1}{Z} w'_{x,j}  $

In [32]:
%%time 

w_Tao_et_al = {}
for disease in pathologies:
    w_Tao_et_al[disease] = W_hat_Tao_et_al[disease].divide(z[disease] , axis=0)

w_Tao_et_al[disease].round(decimals=2)

CPU times: user 6.18 ms, sys: 1.02 ms, total: 7.2 ms
Wall time: 7.22 ms


Unnamed: 0,ResNet50V2,InceptionV3,InceptionResNetV2,EfficientNetB0,DenseNet121,MobileNetV2,ResNet101V2,DenseNet169,VGG16,DenseNet201
0,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
3,1.00,1.00,0.99,1.00,1.00,1.00,1.00,1.00,1.00,1.00
4,1.05,1.05,1.05,0.79,1.05,1.05,1.05,1.05,0.79,1.05
5,1.02,0.96,0.93,1.02,1.02,0.94,1.02,1.02,1.02,1.02
6,1.02,0.96,0.93,1.02,1.02,0.94,1.02,1.02,1.02,1.02
...,...,...,...,...,...,...,...,...,...,...
229,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
230,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
231,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00
232,1.02,1.02,1.02,0.96,1.02,1.02,1.02,0.96,0.96,1.02


## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 7.3 Measuring accuracy </span>

In [33]:
prob_MV_binary_Tao_et_al = pd.DataFrame()
for disease in pathologies:
    prob_MV_binary_Tao_et_al[disease] = (delta[disease] * w_Tao_et_al[disease]).mean(axis=1)

reported_results['Tao'] = ( (prob_MV_binary_Tao_et_al   > 0.5) == truth ).mean(axis=0)

# saving the final accuracies
accuracy['Tao'] = reported_results['Tao']
save_artifacts(dataframe=accuracy, name='accuracy.csv', artifact_path='')

accuracy['Tao']

No Finding                    0.071006
Enlarged Cardiomediastinum    0.390533
Cardiomegaly                  0.656805
Lung Opacity                  0.857988
Lung Lesion                   0.976331
Edema                         0.745562
Consolidation                 0.810651
Pneumonia                     0.952663
Atelectasis                   0.579882
Pneumothorax                  0.875740
Pleural Effusion              0.715976
Pleural Other                 0.994083
Fracture                      0.970414
Support Devices               0.556213
Name: Tao, dtype: float64

# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 8. Final Results </span>

## <span style="color:Green; font-family:PT Sans Narrow; font-size:1.3em"> 8.1 Accuracy comparison </span>

In [34]:
reported_results = reported_results[['F1','F2','proposed','proposed_penalized','Tao','MV']]
reported_results = reported_results.T.drop(columns=['No Finding' , 'Fracture' , 'Support Devices' , 'Enlarged Cardiomediastinum']).T
reported_results.round(decimals=2)

Unnamed: 0,F1,F2,method1,method2,Tao,MV
Cardiomegaly,0.85,0.94,0.67,0.66,0.66,0.64
Lung Opacity,0.74,0.87,0.82,0.8,0.86,0.8
Lung Lesion,0.9,0.98,0.99,0.99,0.98,0.98
Edema,0.76,0.89,0.72,0.72,0.75,0.74
Consolidation,1.0,1.0,0.81,0.81,0.81,0.81
Pneumonia,0.99,1.0,0.95,0.95,0.95,0.95
Atelectasis,0.99,1.0,0.58,0.58,0.58,0.58
Pneumothorax,0.82,0.93,0.93,0.93,0.88,0.88
Pleural Effusion,0.75,0.87,0.76,0.75,0.72,0.72
Pleural Other,0.96,1.0,0.99,0.99,0.99,0.99


In [35]:
# reported_results[['method1','method2','MV','Tao']].plot()

# <span style="color:red; font-family:PT Sans Narrow; font-size:1.3em"> 9. Closing the ssh tunnel and mlflow </span>

In [36]:
# closing the child mlflow session
mlflow.end_run()

# closing the parent mlflow session
mlflow.end_run()

# closing the ssh session
ssh_session.kill()