In [7]:
import os
import glob
import pandas as pd
from pathlib import Path
from scipy import stats
from sklearn.metrics import balanced_accuracy_score, f1_score, roc_auc_score
import numpy as np
from hydra import initialize, compose

In [56]:
def run_test_analysis(base_path, single_test=False):
    paths = list(
        filter(lambda x: x is not None,
        [path if path.is_dir() else None for path in Path(base_path).iterdir()]))

    if single_test:
        paths = [Path(base_path)]
    
    file = pd.read_csv(paths[0] / 'pred.csv').file

    cols = ["file"]
    cols.extend([f"prognosis_{ind}" for ind in range(len(list(paths)))])
    preds = pd.DataFrame(columns=cols)

    preds["file"] = file

    for ind, path_dir in enumerate(paths):
        try:
            pred = pd.read_csv(path_dir / 'pred.csv')
            preds[f"prognosis_{ind}"] = pred.prognosis.values
            preds[f"prognosis_{ind}"].replace('SEVERE', 1, inplace=True)
            preds[f"prognosis_{ind}"].replace('MILD', 0, inplace=True)
            
            preds["prognosis_real"] = pred["prognosis_real"].values
            preds["prognosis_real"].replace('SEVERE', 1, inplace=True)
            preds["prognosis_real"].replace('MILD', 0, inplace=True)
        except Exception:
            continue
    
    modes = stats.mode(preds[[f"prognosis_{ind}" for ind, _ in enumerate(paths)]].values,
                       axis=1)[0]
    preds["prognosis_mode"] = modes
        
    ba_scores = []
    f1_scores = []
    auc_scores = []
    
    if single_test:
        for _ in range(100):
            indicies = np.random.choice(len(preds["prognosis_real"].values),
                                        size=len(preds["prognosis_real"].values),
                                        replace=True)
            _reals = preds["prognosis_real"][indicies]
            _preds = preds["prognosis_0"][indicies]
            ba_scores.append(balanced_accuracy_score(_reals, _preds))
            f1_scores.append(f1_score(_reals, _preds))
            auc_scores.append(roc_auc_score(_reals, _preds))
        
        print('CV analysis results')
        print('$', np.around(np.mean(ba_scores), 3), '\pm', np.around(np.std(ba_scores), 3), '$',
             "\tBA score (full train, on test | BOOTSTRAPPED)")
        print('$', np.around(np.mean(f1_scores), 3), '\pm', np.around(np.std(f1_scores), 3), '$',
             "\tF1-score score (full train, on test | BOOTSTRAPPED)")
        print('$', np.around(np.mean(auc_scores), 3), '\pm', np.around(np.std(auc_scores), 3), '$',
             "\tAUC score (full train, on test | BOOTSTRAPPED)")
    
        return
        
    for ind, path_dir in enumerate(paths):
        if float == type(preds[f"prognosis_{ind}"].values[0]):
            continue
        ba_scores.append(balanced_accuracy_score(preds["prognosis_real"].values.astype(int),
                                              preds[f"prognosis_{ind}"].values.astype(int)))
        f1_scores.append(f1_score(preds["prognosis_real"].values.astype(int),
                                              preds[f"prognosis_{ind}"].values.astype(int)))
        auc_scores.append(roc_auc_score(preds["prognosis_real"].values.astype(int),
                                              preds[f"prognosis_{ind}"].values.astype(int)))   
        
    bs_ba_scores = []
    for _ in range(100):
            indicies = np.random.choice(len(preds["prognosis_real"].values),
                                        size=10,
                                        replace=True)
            _reals = preds["prognosis_real"][indicies].values.astype(int)
            _preds = preds["prognosis_mode"][indicies].values.astype(int)
            bs_ba_scores.append(balanced_accuracy_score(_reals, _preds))

    print('Test analysis results')
    space = " "
    print('$', np.around(np.mean(bs_ba_scores), 3), '\pm', np.around(np.std(bs_ba_scores), 3), '$',
          f'{10*space}BA score (CV test | MODE, BOOTSTRAPPED)')

    print('$', np.around(np.mean(ba_scores),3), '\pm', np.around(np.std(ba_scores), 3), '$',
          f'{10*space}Balanced accuracy score (CV test)')
    print('$', np.around(np.mean(f1_scores), 3), '\pm', np.around(np.std(f1_scores), 3), '$',
         f"{10*space}F1-score score (CV test)")
    print('$', np.around(np.mean(auc_scores), 3), '\pm', np.around(np.std(auc_scores), 3), '$',
         f"{10*space}AUC score (CV test)")
    
    return preds

In [57]:
def run_cv_analysis(base_path):
    paths = list(filter(lambda x: x is not None, [path if path.is_dir() else None for path in\
         Path(base_path).iterdir()]))

    file = pd.read_csv(paths[0] / 'pred_valid.csv').file

    cols = ["file"]
    cols.extend([f"prognosis_{ind}"for ind in range(len(list(paths)))])
    preds = pd.DataFrame(columns=cols)

    preds["file"] = file

    for ind, path_dir in enumerate(paths):
        try:
            pred = pd.read_csv(path_dir / 'pred_valid.csv')
            preds[f"prognosis_{ind}"] = pred.prognosis
            preds[f"prognosis_{ind}_real"] = pred["prognosis_real"]
            preds[f"prognosis_{ind}"].replace('SEVERE', 1, inplace=True)
            preds[f"prognosis_{ind}"].replace('MILD', 0, inplace=True)
            preds[f"prognosis_{ind}_real"].replace('SEVERE', 1, inplace=True)
            preds[f"prognosis_{ind}_real"].replace('MILD', 0, inplace=True)
        except Exception:
            continue
    
    ba_scores = []
    f1_scores = []
    auc_scores = []

    for ind, path_dir in enumerate(paths):
        ba_scores.append(balanced_accuracy_score(preds[f"prognosis_{ind}_real"].values.astype(int),
                                              preds[f"prognosis_{ind}"].values.astype(int)))
        f1_scores.append(f1_score(preds[f"prognosis_{ind}_real"].values.astype(int),
                                              preds[f"prognosis_{ind}"].values.astype(int)))
        auc_scores.append(roc_auc_score(preds[f"prognosis_{ind}_real"].values.astype(int),
                                              preds[f"prognosis_{ind}"].values.astype(int)))

    print('CV analysis results')
    space = " "
    print('$', np.around(np.mean(ba_scores),3), '\pm', np.around(np.std(ba_scores), 3), '$',
          f'{10*space}Balanced accuracy score (CV)')
    print('$', np.around(np.mean(f1_scores), 3), '\pm', np.around(np.std(f1_scores), 3), '$',
         f"{10*space}F1-score score (CV)")
    print('$', np.around(np.mean(auc_scores), 3), '\pm', np.around(np.std(auc_scores), 3), '$',
         f"{10*space}AUC score (CV)")
    
    return preds

In [58]:
## ORIGINAL COMPETITION SUBMISSION --> trained on full training set, many more models to reduce variance in pred.
_ = run_test_analysis(base_path='/mnt/ncshare/ai4covid_hackathon/raw_output/checkpoints/ENSEMBLE/')
_ = run_cv_analysis('/mnt/ncshare/ai4covid_hackathon/raw_output/checkpoints/ENSEMBLE/')

Test analysis results
$ 0.749 \pm 0.165 $           BA score (CV test | MODE, BOOTSTRAPPED)
$ 0.708 \pm 0.021 $           Balanced accuracy score (CV test)
$ 0.636 \pm 0.032 $           F1-score score (CV test)
$ 0.708 \pm 0.021 $           AUC score (CV test)
CV analysis results
$ 0.865 \pm 0.019 $           Balanced accuracy score (CV)
$ 0.864 \pm 0.021 $           F1-score score (CV)
$ 0.865 \pm 0.019 $           AUC score (CV)




In [59]:
## PAPER CHKPT, with early stopping enabled --> on a single model basis, better than the first one
_ = run_test_analysis('/mnt/ncshare/ai4covid_hackathon/raw_output/cross_val/checkpoints/CROSS_VAL_paper_v1_w_ES/')
_ = run_cv_analysis('/mnt/ncshare/ai4covid_hackathon/raw_output/cross_val/checkpoints/CROSS_VAL_paper_v1_w_ES/')

Test analysis results
$ 0.751 \pm 0.128 $           BA score (CV test | MODE, BOOTSTRAPPED)
$ 0.724 \pm 0.018 $           Balanced accuracy score (CV test)
$ 0.663 \pm 0.022 $           F1-score score (CV test)
$ 0.724 \pm 0.018 $           AUC score (CV test)
CV analysis results
$ 0.764 \pm 0.02 $           Balanced accuracy score (CV)
$ 0.774 \pm 0.03 $           F1-score score (CV)
$ 0.764 \pm 0.02 $           AUC score (CV)




In [60]:
_ = run_test_analysis('/mnt/ncshare/ai4covid_hackathon/raw_output/cross_val/checkpoints_image_only/')
_ = run_cv_analysis('/mnt/ncshare/ai4covid_hackathon/raw_output/cross_val/checkpoints_image_only/')

Test analysis results
$ 0.631 \pm 0.155 $           BA score (CV test | MODE, BOOTSTRAPPED)
$ 0.575 \pm 0.073 $           Balanced accuracy score (CV test)
$ 0.552 \pm 0.052 $           F1-score score (CV test)
$ 0.575 \pm 0.073 $           AUC score (CV test)
CV analysis results
$ 0.596 \pm 0.065 $           Balanced accuracy score (CV)
$ 0.665 \pm 0.035 $           F1-score score (CV)
$ 0.596 \pm 0.065 $           AUC score (CV)


In [61]:
_ = run_test_analysis('/mnt/ncshare/ai4covid_hackathon/raw_output/checkpoints/')

Test analysis results
$ 0.753 \pm 0.135 $           BA score (CV test | MODE, BOOTSTRAPPED)
$ 0.705 \pm 0.04 $           Balanced accuracy score (CV test)
$ 0.64 \pm 0.055 $           F1-score score (CV test)
$ 0.705 \pm 0.04 $           AUC score (CV test)




In [62]:
_ = run_test_analysis('/mnt/ncshare/ai4covid_hackathon/raw_output/checkpoints_image_only/')

Test analysis results
$ 0.677 \pm 0.163 $           BA score (CV test | MODE, BOOTSTRAPPED)
$ 0.631 \pm 0.049 $           Balanced accuracy score (CV test)
$ 0.582 \pm 0.048 $           F1-score score (CV test)
$ 0.631 \pm 0.049 $           AUC score (CV test)


