In [1]:
import json
from sklearn.metrics import roc_auc_score, balanced_accuracy_score, f1_score
import numpy as np

# Destruction

# ISIC        
## trad       361-370
## only skin  371-380
## bbox       381-390
## bbox70     391-400

# Atlas
## trad       311-320
## only skin  321-330
## bbox       331-340
## bbox70     341-350

# Cross
## trad       291-300
## only skin  351-360
## bbox       421-425 301-305
## bbox70     426-430 286-290

# Construction

# ISIC
## trad       361-370
## label      401-410
## masked     411-420
## rgbm       441-450 

In [75]:
# Load the GTs

# Atlas
atlas_gt = {}
atlas_gt_csv = open('../atlas-csv/atlas-dermato-all.csv', 'r')
atlas_gt_csv.readline()
for line in atlas_gt_csv.readlines():
    sep = line.strip().split(';')
    if len(sep) > 1:
        image = sep[2]
        diag = int(sep[16])
        atlas_gt[image] = diag
assert(len(atlas_gt) == 872)



def get_atlas_gt(split):
    # Atlas
    atlas_gt_fold = {}
    split = str(split)
    isic_gt_csv = open('../atlas-csv/10fold/atlas-dermato-test-all-' + split + '.csv', 'r')
    isic_gt_csv.readline()
    for line in isic_gt_csv.readlines():
        sep = line.strip().split(';')
        image = sep[2]
        diag = int(sep[16])
        atlas_gt_fold[image] = diag
    return atlas_gt_fold



def get_isic_gt(split):
    # ISIC
    isic_gt = {}
    split = str(split)
    isic_gt_csv = open('../isic-csv/10fold/isic-test-' + split + '.csv', 'r')
    isic_gt_csv.readline()
    for line in isic_gt_csv.readlines():
        sep = line.strip().split(';')
        if len(sep) > 1:
            if sep[2].startswith('ISIC'):
                image = sep[2]
            else:
                image = sep[14]
            diag = int(sep[16])
            isic_gt[image] = diag
    return isic_gt


In [113]:
# exp, auc, acc
#directory = '361'

for directory in range(291, 300):
    directory = str(directory)
    res = directory + '/auc_test_best_all.txt'
    

    # Check the split number
    with open(directory + '/config.json') as json_file:
        json_load = json.load(json_file)
        split = json_load['train_csv'].strip().split('/')[-1][-5]
        #print('split:', split)

    # Check the experiment name
    with open(directory + '/run.json') as json_file:
        json_load = json.load(json_file)
        exp_name = json_load['experiment']['name']
        #print('exp_name', exp_name)

    # Get ground truth -> choose one, atlas or isic
    #gt = get_isic_gt(split)  # isic
    #gt = get_atlas_gt(split) # atlas
    gt = atlas_gt             # cross
    
    read_csv = open(res, 'r')
    preds = {}
    for i in range(5):
        read_csv.readline()

    arr_line = []
    for line in read_csv.readlines():
        sep = line.strip().split(',')
        if len(sep) > 1:
            preds[sep[0]] = float(sep[1])
            
    assert(len(preds) == len(gt))
    

    y_pred = []
    y_true = []

    for key, value in gt.items():
        y_true.append(value)
        y_pred.append(preds[key])

    # AUC
    auc = roc_auc_score(y_true, y_pred)
    
    # balance Accuracy
    y_pred_thresh = [np.around(value) for value in y_pred]
    bAcc = balanced_accuracy_score(y_true, y_pred_thresh)
    f1 = f1_score(y_true, y_pred_thresh)
    
    #print('auc:', auc)
    #print('bAcc:', bAcc)
    print('{},{},{},{}'.format(exp_name, split, auc, bAcc))
    print(bAcc)
    
    # Search for best cut point
    #best_t = -1
    #best_acc = -1
    #for t in np.arange(0, 1, 0.01):
    #    y_pred_t = [1 if pred > t else 0 for pred in y_pred]
    #    bAcc_t = balanced_accuracy_score(y_true, y_pred_t)
    #    if bAcc_t > best_acc:
    #        best_acc = bAcc_t
    #        best_t = t
    #print(best_t, best_acc)

sacred-atlas-rgb-crossdataset-noduplicates,l,0.8351190476190476,0.7239375320020481
0.23 0.7660778289810548
sacred-atlas-rgb-crossdataset-noduplicates,l,0.834389400921659,0.7191628264208909
0.19 0.7516257040450589
sacred-atlas-rgb-crossdataset-noduplicates,l,0.8268689196108551,0.7063364055299539
0.09 0.7593061955965181
sacred-atlas-rgb-crossdataset-noduplicates,l,0.8474846390168972,0.7147081413210445
0.16 0.7809651817716334
sacred-atlas-rgb-crossdataset-noduplicates,l,0.851715309779826,0.7166282642089093
0.12 0.7758064516129033
sacred-atlas-rgb-crossdataset-noduplicates,l,0.825742447516641,0.666410650281618
0.05 0.7645161290322581
sacred-atlas-rgb-crossdataset-noduplicates,l,0.8384920634920634,0.7068356374807988
0.09 0.7611111111111111
sacred-atlas-rgb-crossdataset-noduplicates,l,0.8359062980030723,0.7267921146953404
0.3 0.7623655913978494
sacred-atlas-rgb-crossdataset-noduplicates,l,0.8230926779313875,0.6993343573988735
0.19 0.7592037890424987


In [156]:
# exp, auc, acc
#directory = '361'
# DEBIAS JOURNAL ACC EVALUATION
import glob

def read_preds(path, gt):
    preds = {}
    read_csv = open(path, 'r')
    #print(path)
    for i in range(4):
        read_csv.readline()
    for line in read_csv.readlines():
        sep = line.strip().split(',')
        if len(sep) > 1:
            preds[sep[0]] = float(sep[1])
    
    
    try:
        assert(len(preds) == len(gt))
    except:
        print("ASSERTION ERROR")
        print(len(preds), len(gt))
        
    
    
    y_pred = []
    y_true = []

    for key, value in gt.items():
        y_true.append(value)
        y_pred.append(preds[key])

    return np.array(y_true), np.array(y_pred)

for factor in ['0', '0.3', '0.5', '0.7', '0.9', '1']:
    for split in range(1, 11):
        split = str(split)
        #directory = '/deconstructing-bias-skin-lesion/results-comet-gans/debias.train_trap_journal-rgb.inceptionv4.factor'+str(factor)+'.split' + str(split)
        directory = '/group_DRO/results-comet/
        #res = directory + '/auc_test_best_all.txt'

        # Check the split number
        #split = directory.split('.')[-1].split('split')[-1]
        #factor = directory.split('.')[-2].split('factor')[-1]


        # Check the experiment name
        exp_name = directory.split('/')[-1]

        # Get ground truth -> choose one, atlas or isic                              
        # ISIC 2019
        isic2019_gt = {}
        test_file = '/deconstructing-bias-skin-lesion/isic-trap-csv/paper2021/test_bias_'+factor+'_'+split+'.csv'
        isic2019_gt_csv = open(test_file, 'r')
        isic2019_gt_csv.readline()
        for line in isic2019_gt_csv.readlines():
            sep = line.strip().split(',')
            image = sep[1]
            diag = int(sep[-2])
            isic2019_gt[image] = diag


        # Read predictions
        # ISIC 2019
        #print(directory)
        y_true_isic19, y_pred_isic19 = read_preds(directory + '/test_trapinferred_isic2019.txt', isic2019_gt)
        auc_isic19 = roc_auc_score(y_true_isic19, y_pred_isic19)

        y_pred_isic19_thresh = [np.around(value) for value in y_pred_isic19]
        acc_isic19 = balanced_accuracy_score(y_true_isic19, y_pred_isic19_thresh)
        #print(acc_isic19)

        # ISIC 2019 NormCrop
        y_true_isic19nc, y_pred_isic19nc = read_preds(directory + '/test_trapinferred_isic2019_normcrophull.txt', isic2019_gt)
        auc_isic19nc = roc_auc_score(y_true_isic19nc, y_pred_isic19nc)

        y_pred_isic19nc_thresh = [np.around(value) for value in y_pred_isic19nc]
        acc_isic19nc = balanced_accuracy_score(y_true_isic19nc, y_pred_isic19nc_thresh)
        print(acc_isic19nc)


        '''
        # Search for best cut point
        #best_t = -1
        #best_acc = -1
        #for t in np.arange(0, 1, 0.01):
        #    y_pred_t = [1 if pred > t else 0 for pred in y_pred]
        #    bAcc_t = balanced_accuracy_score(y_true, y_pred_t)
        #    if bAcc_t > best_acc:
        #        best_acc = bAcc_t
        #        best_t = t
        #print(best_t, best_acc)
        '''

0.7617673517476478
0.7536571857934113
0.7519325338421861
0.7815709908942863
0.7652706813249492
0.7629986109756942
0.7645351939559206
0.7696506971126379
0.7568365259070949
0.7669122255453915
0.7736046665994325
0.7584548046820474
0.7610374092450873
0.7350875249088048
0.7519457364205182
0.7328752872079726
0.7694367864009923
0.7464866537917276
0.7608183062112105
0.7694166420015687
0.74449700469876
0.7544898818915593
0.723251470013628
0.7215064122793546
0.7238085883846359
0.7179918536281846
0.765538231250326
0.7541622580957683
0.7283748971199326
0.730130573981387
0.7107316564758199
0.7276732038057658
0.7035575147225501
0.719377559715853
0.7518123214416526
0.7177755575291099
0.7248639375786838
0.7277804446374594
0.7005095595781345
0.7206077482095582
0.7106727822237358
0.6548659385172293
0.6817852128732957
0.6875389840827857
0.6563623378899405
0.684858721769064
0.6691349134287234
0.6560016554686341
0.6568579494732136
0.6986635398114109
0.6230536110591758
0.6864946319928368
0.6611106357702125
