In [1]:
import torch
import numpy as np
import pandas as pd
#import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc, balanced_accuracy_score
#import matplotlib.pyplot as plt
import torchvision
import shutil
import os

# DDI

In [13]:
def cal_metrics(csv_path, type_indices, is_binary=False, num=2):
    '''
    calculate average accuracy, accuracy per skin type, PQD, DPM, EOM.
    All known skin types
    input val results csv path, type_indices: a list
    output a dic, 'acc_avg': value, 'acc_per_type': array[x,x,x], 'PQD', 'DPM', 'EOM'
    '''
    df = pd.read_csv(csv_path)
    labels_array = np.zeros((3, len(df['label'].unique())))
    correct_array = np.zeros((3, len(df['label'].unique())))
    predictions_array = np.zeros((3, len(df['label'].unique())))
    positive_list = []  # get positive probability for binary classification
    
    for i in range(df.shape[0]):
        prediction = df.iloc[i]['prediction']
        label = df.iloc[i]['label']
        type = (df.iloc[i]['fitzpatrick']-1)//num
        labels_array[int(type), int(label)] += 1
        predictions_array[int(type),int(prediction)] += 1
        if prediction == label:
            correct_array[int(type), int(label)] += 1

        if is_binary:
            if prediction == 0:
                positive_list.append(1.0-df.iloc[i]['prediction_probability'])
            else:
                positive_list.append(df.iloc[i]['prediction_probability'])
    
    correct_array = correct_array[type_indices]
    labels_array = labels_array[type_indices]
    predictions_array = predictions_array[type_indices]

    # avg acc, acc per type
    correct_array_sumc, labels_array_sumc = np.sum(correct_array, axis=1), np.sum(labels_array, axis=1)  # sum skin conditions
    acc_array = correct_array_sumc/labels_array_sumc
    avg_acc = np.sum(correct_array)/np.sum(labels_array)

    # PQD
    PQD = acc_array.min()/acc_array.max()

    # DPM
    demo_array = predictions_array/np.sum(predictions_array, axis=1, keepdims=True)
    DPM = np.mean(demo_array.min(axis=0)/demo_array.max(axis=0))

    # EOM
    eo_array = correct_array/labels_array
    EOM = np.mean(np.min(eo_array,axis=0)/np.max(eo_array, axis=0))

    # if is binary classification, output AUC
    if is_binary:
        fpr, tpr, threshold = roc_curve(df['label'], positive_list,drop_intermediate=True)
        AUC = auc(fpr, tpr)
    else:
        AUC = -1

    return {'acc_avg': avg_acc, 'acc_per_type': acc_array, 'PQD': PQD, 'DPM': DPM, 'EOM': EOM, 'AUC': AUC}

In [2]:
# In Domain
epoch = 20
label = 'high'
# In Domain: 'random_holdout', Out Domain: 'a12', 'a34', 'a56'
holdout_set = 'random_holdout' 
model_name = '' # Put the model Name Here
type_indices = [0,1,2] # Remove the skin type on which the model is trained for out domain, eg: a12->[1,2]
csv_folder_list = ['S36']#, 'S37', 'S38', 'S39', 'S40']
is_binary = True

avg_array = np.zeros((len(csv_folder_list)))
acc_per_type_array = np.zeros((len(csv_folder_list), len(type_indices)))
PQD_array = np.zeros((len(csv_folder_list)))
DPM_array = np.zeros((len(csv_folder_list)))
EOM_array = np.zeros((len(csv_folder_list)))
AUC_array = np.zeros((len(csv_folder_list)))

for i in range(len(csv_folder_list)):
    csv_path = 'results_{}_{}_{}_{}.csv'.format(model_name, epoch,label,holdout_set)
    #csv_path = 'results/{}/results_{}_{}_{}_{}.csv'.format(csv_folder_list[i], model_name, epoch,label,holdout_set)
    dic = cal_metrics(csv_path, type_indices, is_binary=False, num=[1,2][holdout_set=='random_holdout'])
    avg_array[i] = dic['acc_avg']
    acc_per_type_array[i, :] = dic['acc_per_type']
    PQD_array[i] = dic['PQD']
    DPM_array[i] = dic['DPM']
    EOM_array[i] = dic['EOM']
    AUC_array[i] = dic['AUC']


print('acc_avg array')
print(avg_array)
print('acc per type')
print(acc_per_type_array)
print('PQD')
print(PQD_array)
print('DPM')
print(DPM_array)
print('EOM')
print(EOM_array)
print('AUC')
print(AUC_array)

# FITZPATRICK

In [3]:
def cal_metrics(csv_path, type_indices, is_binary=False):
    '''
    calculate average accuracy, accuracy per skin type, PQD, DPM, EOM.
    All known skin types
    input val results csv path, type_indices: a list
    output a dic, 'acc_avg': value, 'acc_per_type': array[x,x,x], 'PQD', 'DPM', 'EOM'
    '''
    df = pd.read_csv(csv_path)
    labels_array = np.zeros((6, len(df['label'].unique())))
    correct_array = np.zeros((6, len(df['label'].unique())))
    predictions_array = np.zeros((6, len(df['label'].unique())))
    positive_list = []  # get positive probability for binary classification
    for i in range(df.shape[0]):
        prediction = df.iloc[i]['prediction']
        label = df.iloc[i]['label']
        type = df.iloc[i]['fitzpatrick']-1
        labels_array[int(type), int(label)] += 1
        predictions_array[int(type),int(prediction)] += 1
        if prediction == label:
            correct_array[int(type), int(label)] += 1

        if is_binary:
            if prediction == 0:
                positive_list.append(1.0-df.iloc[i]['prediction_probability'])
            else:
                positive_list.append(df.iloc[i]['prediction_probability'])
    
    correct_array = correct_array[type_indices]
    labels_array = labels_array[type_indices]
    predictions_array = predictions_array[type_indices]

    # avg acc, acc per type
    correct_array_sumc, labels_array_sumc = np.sum(correct_array, axis=1), np.sum(labels_array, axis=1)  # sum skin conditions
    acc_array = correct_array_sumc/labels_array_sumc
    avg_acc = np.sum(correct_array)/np.sum(labels_array)

    # PQD
    PQD = acc_array.min()/acc_array.max()

    # DPM
    demo_array = predictions_array/np.sum(predictions_array, axis=1, keepdims=True)
    DPM = np.mean(demo_array.min(axis=0)/demo_array.max(axis=0))

    # EOM
    eo_array = correct_array/labels_array
    EOM = np.mean(np.min(eo_array,axis=0)/np.max(eo_array, axis=0))

    # if is binary classification, output AUC
    if is_binary:
        fpr, tpr, threshold = roc_curve(df['label'], positive_list,drop_intermediate=True)
        AUC = auc(fpr, tpr)
    else:
        AUC = -1

    return {'acc_avg': avg_acc, 'acc_per_type': acc_array, 'PQD': PQD, 'DPM': DPM, 'EOM': EOM, 'AUC': AUC}

In [4]:
epoch = 20
label = 'high'
# In Domain: 'random_holdout', Out Domain: 'a12', 'a34', 'a56'
holdout_set = 'a56' 
model_name = 'patchmotp_fitz_a56_2_2' # Put the model name here
type_indices = [0,1,2,3,4,5]  # Remove the skin type on which the model is trained for out domain, eg: a12->[2,3,4,5]
csv_folder_list = ['S36', 'S37', 'S38', 'S39', 'S40']
is_binary = True

avg_array = np.zeros((len(csv_folder_list)))
acc_per_type_array = np.zeros((len(csv_folder_list), len(type_indices)))
PQD_array = np.zeros((len(csv_folder_list)))
DPM_array = np.zeros((len(csv_folder_list)))
EOM_array = np.zeros((len(csv_folder_list)))
AUC_array = np.zeros((len(csv_folder_list)))

for i in range(len(csv_folder_list)):
    csv_path = 'results_{}_{}_{}_{}.csv'.format(model_name, epoch,label,holdout_set)
    #csv_path = 'results/{}/results_{}_{}_{}_{}.csv'.format(csv_folder_list[i], model_name, epoch,label,holdout_set)
    dic = cal_metrics(csv_path, type_indices, is_binary=False)
    avg_array[i] = dic['acc_avg']
    acc_per_type_array[i, :] = dic['acc_per_type']
    PQD_array[i] = dic['PQD']
    DPM_array[i] = dic['DPM']
    EOM_array[i] = dic['EOM']
    AUC_array[i] = dic['AUC']


print('acc_avg array')
print(avg_array)
print('acc per type')
print(acc_per_type_array)
print('PQD')
print(PQD_array)
print('DPM')
print(DPM_array)
print('EOM')
print(EOM_array)
print('AUC')
print(AUC_array)
print(f'{avg_array[0]},{acc_per_type_array[0][0]},{acc_per_type_array[0][1]},{PQD_array[0]},{DPM_array[0]},{EOM_array[0]}')