# Importing the libraries

In [2]:
%reload_ext autoreload
%autoreload 2

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import sys
sys.path.append('../')

import funcs
import load_data
import mlflow
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import subprocess
from time import time
import git
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

%reload_ext load_data
%reload_ext funcs

In [2]:
def running_evaluation(dataset='valid', pathologies=['pathologies'], Data='', model='', number_augmentation=3):

    def log_results(dataframe, probs_2d_orig, pathologies, MA, dataset):

        def add_dataframe_info_columns(df_info, probs_2d, pathologies):

            df              = df_info.drop(pathologies,axis=1)
            df_temp         = pd.DataFrame(probs_2d_orig, columns=pathologies).set_index(df.index)
            df[pathologies] = df_temp[pathologies]

            return df

        path = f'../../prob_{dataset}.csv'
        df = add_dataframe_info_columns(df_info=dataframe, probs_2d=probs_2d_orig, pathologies=pathologies)
        df.to_csv(path)
        mlflow.log_artifact(path,artifact_path=f'probabilities/{dataset}/')

        path = f'../../prob_aug_avg_{dataset}.csv'
        pd.DataFrame( MA.probs_avg_2d, columns=Info.pathologies ).to_csv(path)
        mlflow.log_artifact(path,artifact_path=f'probabilities/{dataset}/')

        path = f'../../uncertainty_{dataset}.csv'
        pd.DataFrame( MA.probs_std_2d, columns=Info.pathologies ).to_csv(path)
        mlflow.log_artifact(path,artifact_path=f'uncertainties/{dataset}/')


        path = f'../../accuracy_orig_{dataset}.csv'
        accuracy = np.floor( 1000*np.mean((MA.truth > 0.5) == (probs_2d_orig > 0.5),axis=0) )/ 10
        pd.DataFrame( {'accuracy':accuracy, 'pathologies':Info.pathologies} ).set_index('pathologies').to_csv(path)
        mlflow.log_artifact(path,artifact_path=f'accuracies/{dataset}/')

        path = f'../../accuracy_aug_{dataset}.csv'
        accuracy = np.floor( 1000*np.mean((MA.truth > 0.5) == (MA.probs_avg_2d > 0.5),axis=0) )/ 10
        pd.DataFrame( {'accuracy':accuracy, 'pathologies':Info.pathologies} ).set_index('pathologies').to_csv(path)
        mlflow.log_artifact(path,artifact_path=f'accuracies/{dataset}/')



    probs_2d_orig, final_results, MA = funcs.apply_technique_aim_1_2( how_to_treat_nans   = 'ignore',
                                                                      data_generator      = Data.generator[dataset],
                                                                      data_generator_aug  = Data.generator[dataset + '_aug'],
                                                                      model               = model,
                                                                      uncertainty_type    = 'std',
                                                                      number_augmentation = number_augmentation)

    log_results(dataframe     = Data.dataframe[dataset],
                probs_2d_orig = probs_2d_orig,
                pathologies   = pathologies,
                MA            = MA,
                dataset       = dataset)


def setting_up_gpu():

    config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads=5, intra_op_parallelism_threads=5) # , device_count={"GPU":1, "CPU": 10})
    # config.gpu_options.allow_growth = True
    # config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)

    return sess


def mlflow_setting_up():

    server, artifact = funcs.mlflow_settings()
    mlflow.set_tracking_uri(server)


    """ Creating/Setting the experiment
        Line below should be commented if the experiment is already created
        If kept commented during the first run of a new experiment, the set_experiment
        will automatically create the new experiment with local artifact storage """

    experiment_name = 'soft_weighted_MV_aim1_3'

    if not client.get_experiment_by_name(experiment_name):
        mlflow.create_experiment(name=experiment_name, artifact_location=artifact)

    mlflow.set_experiment(experiment_name=experiment_name)

    # Starting the MLflow
    run = mlflow.start_run() # run_name; run_id
    # mlflow.set_tag(f'mlflow.note.content',f'run_id: {run.info.run_id}')

    return run

### Order of pathologies

In [3]:
pathologies = ["No Finding", "Enlarged Cardiomediastinum" , "Cardiomegaly" , "Lung Opacity" , "Lung Lesion", "Edema" , "Consolidation" , "Pneumonia" , "Atelectasis" , "Pneumothorax" , "Pleural Effusion" , "Pleural Other" , "Fracture" , "Support Devices"]

### creating a ssh-tunnel to server in the background

In [4]:
command     = 'ssh -N -L 5000:localhost:5432 artinmajdi@data7-db1.cyverse.org &'
ssh_session = subprocess.Popen('exec ' + command, stdout=subprocess.PIPE, shell=True)

In [5]:
model_experiment_name = 'soft_weighted_MV_aim1_3'

run_id_models = {     'ResNet50V2':       'e98f3c431281497e8155d7384b11cca9',
                      'InceptionV3':      '108673cc2258460d961a1da71942a30d',
                      'InceptionResNetV2':'500f8449a371444188ae9fdee950a0c5',
                      'EfficientNetB0':   'aa829e405f904b7e865b5cc8f621a0e4',
                      'DenseNet121':      'f857040aa1284bdb8b932aacd37379cb',
                      'MobileNetV2':      '24c9eb3e84c1407698dd08a174ae9008',
                      'ResNet101V2':      '5aed61485804409b8dd9ec5419f26697',
                      'DenseNet169':      'afc854bea43e49a08992a2cfb1d94c98',
                      'VGG16':            '255bf0aae1e74b228618ea5c3ce0efb5',
                      'DenseNet201':      '6f72b8f68de74ea5a7027c0f288e1e28'}

run_id_stats_valid = {'ResNet50V2':       '7c50e57cbc574a898f542ebd8603fa6b',
                      'InceptionV3':      'a35b54b6a74747df8388d67ba5f1966c',
                      'InceptionResNetV2':'00619d5cf0a84d82a68f7c97f4c5f575',
                      'EfficientNetB0':   '5969b09160af40339135257e17cc6744',
                      'DenseNet121':      '59eb1cb557af457f8846c7dca5e70090',
                      'MobileNetV2':      '193ad9cf68374a3db1fdbb4473e37bbd',
                      'ResNet101V2':      '3828ffa16106434c9d153341ba5647f3',
                      'DenseNet169':      '3aea8516a073408ba17ff1c4aca6d76a',
                      'VGG16':            'a6da14800fad4b659c5145ec4874b5ea',
                      'DenseNet201':      'c12fb1de7cbd4b5fb6be8bc2a9929a21'}

run_id_stats_test = {'ResNet50V2':       '4b853d6dfdf44f73be4031161e8b714c',
                     'InceptionV3':      '5351397f046f42a0b698d664dd122a22',
                     'InceptionResNetV2':'06eabd82e54745aea0c8258fba710b51',
                     'EfficientNetB0':   'fe42bc598fae4f6d94f72bd664200cad',
                     'DenseNet121':      'adc231040ccc44f8a835e02d42dcca1d',
                     'MobileNetV2':      '4b7f4c085b6e45689ae1b36ef5ada964',
                     'ResNet101V2':      '93ba5a71d9aa45e9888f50d1bcefe449',
                     'DenseNet169':      '64911772e152421ca6ec01de20b39910',
                     'VGG16':            'd970926c29bf4716996204f3206e7a90',
                     'DenseNet201':      '5297e4094113432284a3c6f07c4efb1e'}

model_names_list = list(run_id_models.keys())

### Setting up mlflow config

In [6]:
# getting the server config
server, artifact = funcs.mlflow_settings()

# setting the server uri
mlflow.set_tracking_uri(server)

# Setting up the experiment
experiment_name = 'soft_weighted_MV_aim1_3'
mlflow.set_experiment(experiment_name=experiment_name)

In [7]:
MEASURING_UNCERTAINTY_FOR_EACH_LABELERS = False

if MEASURING_UNCERTAINTY_FOR_EACH_LABELERS:

    # starting the parent session
    j = 1
    model_name     = model_names_list[j]
    run_id_parent  = run_id_models[model_name]
    session_parent = mlflow.start_run(run_id=run_id_parent)

    # starting the child session
    mode_dataset  = 'train_val'
    session_child = mlflow.start_run(run_name=mode_dataset, nested=True)

    mlflow.set_tag('mlflow.note.content',f'run_id {session_child.info.run_id}')
    mlflow.set_tag('run_id', session_child.info.run_id)

### Saving the Git commit  (only in Jupyter notebook)

In [8]:
if MEASURING_UNCERTAINTY_FOR_EACH_LABELERS:

    repo = git.Repo(search_parent_directories=True)
    git_commit_hash = repo.head.object.hexsha
    print('git commit hash', git_commit_hash)
    mlflow.set_tag('mlflow.source.git.commit', git_commit_hash)


### Reading Terminal Inputs

In [9]:
GETTING_INPUTS_VIA_TERMINAL = False

if GETTING_INPUTS_VIA_TERMINAL:
    epochs, batch_size, max_sample, architecture_name, number_augmentation = funcs.reading_terminal_inputs()
else:
    epochs, batch_size, max_sample, architecture_name, number_augmentation = 3, 40, 1000000, 'DenseNet121', 3


### Loading data

In [10]:
dataset    = 'chexpert' # nih chexpert
dir        = '/groups/jjrodrig/projects/chest/dataset/' + dataset + '/'

if MEASURING_UNCERTAINTY_FOR_EACH_LABELERS:

    RUNNING_NEW_RUN = False

    if RUNNING_NEW_RUN:
        Data, Info = load_data.load(dir=dir, dataset=dataset, batch_size=batch_size, mode='train_val', max_sample=max_sample)

        mlflow.log_param('dataset'     , dataset)
        mlflow.log_param('max_sample'  , max_sample)
        mlflow.log_param('train count' , len(Data.generator['train'].filenames))
        mlflow.log_param('valid count' , len(Data.generator['valid'].filenames))
        mlflow.log_param('batch size'  , batch_size)

    else:
        Data, Info = load_data.load(dir=dir, dataset=dataset, batch_size=batch_size, mode='valid', max_sample=max_sample)


### Optimization

In [11]:
if MEASURING_UNCERTAINTY_FOR_EACH_LABELERS:

    OPTIMIZE_MODEL = False

    if OPTIMIZE_MODEL:
        model = funcs.optimize( train_dataset     = Data.data_tf['train'],
                                valid_dataset     = Data.data_tf['valid'],
                                architecture_name = architecture_name,
                                epochs            = epochs,
                                Info              = Info,
                                dir               = dir)
    else:
         # NOTE: "session_child" might be the "session_parent"
        model = mlflow.keras.load_model(model_uri=f'runs:/{session_child.info.run_id}/model',compile=False)

        model.compile(  optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
                        loss      = funcs.weighted_bce_loss(Info.class_weights), # tf.keras.losses.binary_crossentropy #
                        metrics   = [tf.keras.metrics.binary_accuracy])

### Evaluation

In [12]:
EVALUATE = True

if EVALUATE and MEASURING_UNCERTAINTY_FOR_EACH_LABELERS:

    RUN_ON_VALIDATION = False
    RUN_ON_TEST       = True

    if RUN_ON_VALIDATION:
        Data, Info = load_data.load(dir=dir, dataset=dataset, batch_size=batch_size, mode='valid', max_sample=max_sample)

        running_evaluation( dataset             = 'valid',
                            pathologies         = Info.pathologies,
                            Data                = Data,
                            model               = model,
                            number_augmentation = number_augmentation)

    if RUN_ON_TEST:
        Data, Info= load_data.load(dir=dir, dataset=dataset, batch_size=batch_size, mode='test', max_sample=max_sample)

        running_evaluation( dataset             = 'test',
                            pathologies         = Info.pathologies,
                            Data                = Data,
                            model               = model,
                            number_augmentation = number_augmentation)

In [None]:
# starting the parent session
for j in range(len(model_names_list)):
    model_name    = model_names_list[j]
    run_id        = run_id_stats_valid[model_name]
    session_stats = mlflow.get_run(run_id=run_id)

    client = mlflow.tracking.MlflowClient()
    local_dir = f'../../temp2_aim1_3_{model_name}'
    os.mkdir(local_dir)
    full_path = client.download_artifacts(run_id=run_id, path='', dst_path=local_dir)

In [33]:
std_stats, prob_stats = {}, {}
for j in range(len(model_names_list)):
    model_name    = model_names_list[j]

    path_std = f'/home/u29/mohammadsmajdi/projects/chest_xray/temp_aim1_3_{model_name}/uncertainty_{model_name}.csv'
    path_prob = f'/home/u29/mohammadsmajdi/projects/chest_xray/temp_aim1_3_{model_name}/prob_{model_name}_orig.csv'

    std_stats[model_name] = pd.read_csv(path_std)
    prob_stats[model_name] = pd.read_csv(path_std)

In [46]:
T, w_hat = {}, {}
for j in range(len(model_names_list)):

    model_name        = model_names_list[j]

    T[model_name]     = 1 - std_stats[model_name].set_index('Unnamed: 0')

    w_hat[model_name] = T[model_name].mean(axis=0)

In [None]:
T['ResNet50V2']

In [None]:
pd.DataFrame(w_hat['ResNet50V2'])

In [None]:
df = pd.DataFrame(w_hat)
df

In [101]:
w_sum = df.sum(axis=1).to_numpy()

w_sum_2d = np.zeros(df.shape)
for j in range(len(model_names_list)):
    w_sum_2d[:,j] = w_sum

weights = df / pd.DataFrame(w_sum_2d,index=pathologies,columns=model_names_list)



In [None]:
# weights
# pd.DataFrame()


In [15]:
# closing the child mlflow session
mlflow.end_run()

# closing the parent mlflow session
mlflow.end_run()

# closing the ssh session
ssh_session.kill()