In [1]:
import os
from pathlib import Path
from glob import glob

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import RocCurveDisplay, auc
from scipy.stats import rankdata


In [2]:
PATH_ROOT = Path(os.getcwd()).absolute().parent.parent
print(PATH_ROOT)

/home/lukec/workspace/applicabilityDomain


In [3]:
CLF_alias = ['KNN', 'NN', 'RF', 'SVM']
AD_alias = ['DM-δ',
            'DM-γ',
            'DM-κ',
            'BB',
            'Prob.',
            'Magnet',
            'FS',
            'RC'
]
AL = ['Magnet', 'FS', 'RC']
DATASETS = ['Ames', 'BBBP', 'Cancer', 'CYP1A2', 'hERG', 'HIV', 'Liver']

In [4]:
path_aut = os.path.join(PATH_ROOT, 'plot_results', 'roc', 'auc.csv')
df_auc = pd.read_csv(path_aut)

df_auc

Unnamed: 0,Dataset,Classifier,Method,AUC,Rank
0,Ames,KNN,DM-δ,0.586356,7
1,Ames,KNN,DM-γ,0.601921,5
2,Ames,KNN,DM-κ,0.587643,6
3,Ames,KNN,BB,0.528490,8
4,Ames,KNN,Prob.,0.705766,2
...,...,...,...,...,...
219,Liver,SVM,BB,0.480144,8
220,Liver,SVM,Prob.,0.654888,1
221,Liver,SVM,Magnet,0.644450,2
222,Liver,SVM,FS,0.634226,3


In [5]:
df_auc_pivot = df_auc.pivot(index=['Dataset', 'Classifier'], columns='Method',  values='Rank')
# Rearrange columns
df_auc_pivot = df_auc_pivot[AD_alias]

df_auc_pivot

Unnamed: 0_level_0,Method,DM-δ,DM-γ,DM-κ,BB,Prob.,Magnet,FS,RC
Dataset,Classifier,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Ames,KNN,7,5,6,8,2,4,1,3
Ames,NN,6,5,7,8,2,4,1,3
Ames,RF,8,5,6,7,1,4,3,2
Ames,SVM,7,5,6,8,2,4,1,3
BBBP,KNN,5,6,7,8,3,4,2,1
BBBP,NN,3,5,6,7,2,4,1,8
BBBP,RF,4,7,6,8,1,5,2,3
BBBP,SVM,5,6,7,8,3,4,1,2
CYP1A2,KNN,5,6,7,8,1,4,2,3
CYP1A2,NN,5,6,7,8,1,4,2,3


In [6]:
path_output_auc_rank = os.path.join(PATH_ROOT, 'plot_results', 'roc', 'auc_pivot.xlsx')
df_auc_pivot.to_excel(path_output_auc_rank)

In [7]:
df_auc_rank = pd.DataFrame()

for d in DATASETS:
    # Get mean value by dataset
    indices_by_dataset = df_auc_pivot.index.get_level_values('Dataset') == d
    rank_means = df_auc_pivot[indices_by_dataset].mean(axis=0)
    # Get ranking of the means
    rank_means = rankdata(rank_means, method='min')
    _df = pd.DataFrame([rank_means], columns=AD_alias)
    _df['Dataset'] = d
    df_auc_rank = df_auc_rank.append(_df, ignore_index=True)

df_auc_rank = df_auc_rank.set_index('Dataset')
df_auc_rank = df_auc_rank.astype(int)
# Sort index. Note that upper case goes 1st.
df_auc_rank = df_auc_rank.sort_index()

In [8]:
path_output_auc_rank_mean = os.path.join(PATH_ROOT, 'plot_results', 'roc', 'auc_mean.xlsx')
df_auc_rank.to_excel(path_output_auc_rank_mean)