In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import statistics
from tqdm import tqdm

In [None]:
DELQSAR_ROOT = os.getcwd() + '/../../'

if not os.path.isdir('fixed_threshold_bin_plots'):
    os.mkdir('fixed_threshold_bin_plots')
    
def pathify(fname):
    return os.path.join('fixed_threshold_bin_plots', fname)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-paper')

matplotlib.rc('font', family='sans-serif')
matplotlib.rc('font', serif='Arial')
matplotlib.rc('text', usetex='false')

In [None]:
def get_avg_AUCs_stdevs(dataset, model_type, metric):
    all_aucs = np.array([
        df_data[df_data['model type'].isin([str(model_type)])
               & df_data['dataset'].isin([str(dataset)])][str(metric)],
    ])

    avg_aucs = [statistics.mean(aucs) for aucs in all_aucs]
    stdevs = [statistics.stdev(aucs) for aucs in all_aucs]
    return avg_aucs, stdevs

In [None]:
def make_plot_AUC(img_name, dataset, y_lb, y_ub, metric, title=True, legend=False, ylabel=False,
                          barWidth=0.25, eLineWidth=0.5, capSize=1, capThick=0.5):
    barWidth, eLineWidth, capSize, capThick = barWidth, eLineWidth, capSize, capThick
    if legend:
        fig = plt.figure(figsize=(7, 2.33), dpi=300)
    elif ylabel:
        fig = plt.figure(figsize=(2.33, 1.9), dpi=300)
    elif 'sEH' in dataset or 'ROC' in metric:
        fig = plt.figure(figsize=(2.1, 1.9), dpi=300)
    else:
        fig = plt.figure(figsize=(2.15, 1.9), dpi=300)

    bars1 = OH_FFNN_bin
    err1 = OH_FFNN_bin_stdevs
    bars2 = FP_FFNN_bin
    err2 = FP_FFNN_bin_stdevs
    bars3 = D_MPNN_bin
    err3 = D_MPNN_bin_stdevs
    
    bars4 = OH_FFNN
    err4 = OH_FFNN_stdevs
    bars5 = FP_FFNN
    err5 = FP_FFNN_stdevs
    bars6 = D_MPNN
    err6 = D_MPNN_stdevs
    
    bars7 = OH_FFNN_pt
    err7 = OH_FFNN_pt_stdevs
    bars8 = FP_FFNN_pt
    err8 = FP_FFNN_pt_stdevs
    bars9 = D_MPNN_pt
    err9 = D_MPNN_pt_stdevs
    
    bars10 = random_guess
    err10 = random_guess_stdevs

    r1 = [0]
    r2 = [x + barWidth for x in r1]
    r3 = [x + barWidth for x in r2]
    r4 = [x + barWidth for x in r3]
    r5 = [x + barWidth for x in r4]
    r6 = [x + barWidth for x in r5]
    r7 = [x + barWidth for x in r6]
    r8 = [x + barWidth for x in r7]
    r9 = [x + barWidth for x in r8]
    r10 = [x + barWidth for x in r9]

    colors = ["#DD8452", "#937860", "#DA8BC3",
            "#4878D0", "#6ACC64", "#D65F5F",
            "#956CB4", "#D5BB67", "#82C6E2",
            "#797979"]
    plt.bar(r1, bars1, yerr=err1, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[0], width=barWidth, label='OH-FFNN bin', zorder=2)
    plt.bar(r2, bars2, yerr=err2, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[1], width=barWidth, label='FP-FFNN bin', zorder=2)
    plt.bar(r3, bars3, yerr=err3, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[2], width=barWidth, label='D-MPNN bin', zorder=2)
    plt.bar(r4, bars4, yerr=err4, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[3], width=barWidth, label='OH-FFNN', zorder=2)
    plt.bar(r5, bars5, yerr=err5, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[4], width=barWidth, label='FP-FFNN', zorder=2)
    plt.bar(r6, bars6, yerr=err6, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[5], width=barWidth, label='D-MPNN', zorder=2)
    plt.bar(r7, bars7, yerr=err7, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[6], width=barWidth, label='OH-FFNN pt', zorder=2)
    plt.bar(r8, bars8, yerr=err8, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[7], width=barWidth, label='FP-FFNN pt', zorder=2)
    plt.bar(r9, bars9, yerr=err9, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[8], width=barWidth, label='D-MPNN pt', zorder=2)
    plt.bar(r10, bars10, yerr=err10, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
            color=colors[9], width=barWidth, label='random guess', zorder=2)
    
    if legend:
        leg = plt.legend(loc='lower center', bbox_to_anchor = (0.5,-0.5), numpoints=1, fontsize=7, ncol=5)

    fig.canvas.draw()
    ax = plt.gca()
    ax.grid(zorder=1)
    if ylabel:
        ax.set_ylabel(str(metric), fontsize=8)
    if title:
        ax.set_title(str(dataset), fontsize=8)
    ax.set_ylim([y_lb, y_ub]) 
    ax.tick_params(axis='x', length=0)
    ax.tick_params(labelsize=8, bottom=False, labelbottom=False)

    plt.tight_layout()
    if legend:
        plt.savefig(pathify(img_name), bbox_extra_artists=(leg,), bbox_inches='tight')
    else:
        plt.savefig(pathify(img_name))
    plt.show()

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'bin_AUCs.csv')) 

In [None]:
OH_FFNN_bin, OH_FFNN_bin_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'OH-FFNN bin', 'PR AUC')
FP_FFNN_bin, FP_FFNN_bin_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'FP-FFNN bin', 'PR AUC')
D_MPNN_bin, D_MPNN_bin_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'D-MPNN bin', 'PR AUC')
OH_FFNN, OH_FFNN_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'OH-FFNN', 'PR AUC')
FP_FFNN, FP_FFNN_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'FP-FFNN', 'PR AUC')
D_MPNN, D_MPNN_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'D-MPNN', 'PR AUC')
OH_FFNN_pt, OH_FFNN_pt_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'OH-FFNN pt', 'PR AUC')
FP_FFNN_pt, FP_FFNN_pt_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'FP-FFNN pt', 'PR AUC')
D_MPNN_pt, D_MPNN_pt_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'D-MPNN pt', 'PR AUC')
random_guess, random_guess_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'Random guess', 'PR AUC')

In [None]:
make_plot_AUC('DD1S_CAIX_bin_PR_AUCs.png', 'DD1S CAIX', 0, 0.1, 'PR AUC', ylabel=True)

In [None]:
OH_FFNN_bin, OH_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'OH-FFNN bin', 'PR AUC')
FP_FFNN_bin, FP_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'FP-FFNN bin', 'PR AUC')
D_MPNN_bin, D_MPNN_bin_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'D-MPNN bin', 'PR AUC')
OH_FFNN, OH_FFNN_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'OH-FFNN', 'PR AUC')
FP_FFNN, FP_FFNN_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'FP-FFNN', 'PR AUC')
D_MPNN, D_MPNN_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'D-MPNN', 'PR AUC')
OH_FFNN_pt, OH_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'OH-FFNN pt', 'PR AUC')
FP_FFNN_pt, FP_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'FP-FFNN pt', 'PR AUC')
D_MPNN_pt, D_MPNN_pt_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'D-MPNN pt', 'PR AUC')
random_guess, random_guess_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'Random guess', 'PR AUC')

In [None]:
# getting legend
make_plot_AUC('triazine_sEH_bin_PR_AUCs_legend.png', 'triazine sEH', 0.25, 0.5, 'PR AUC', legend=True)

In [None]:
make_plot_AUC('triazine_sEH_bin_PR_AUCs.png', 'triazine sEH', -0.01, 0.5, 'PR AUC')

In [None]:
OH_FFNN_bin, OH_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'OH-FFNN bin', 'PR AUC')
FP_FFNN_bin, FP_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'FP-FFNN bin', 'PR AUC')
D_MPNN_bin, D_MPNN_bin_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'D-MPNN bin', 'PR AUC')
OH_FFNN, OH_FFNN_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'OH-FFNN', 'PR AUC')
FP_FFNN, FP_FFNN_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'FP-FFNN', 'PR AUC')
D_MPNN, D_MPNN_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'D-MPNN', 'PR AUC')
OH_FFNN_pt, OH_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'OH-FFNN pt', 'PR AUC')
FP_FFNN_pt, FP_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'FP-FFNN pt', 'PR AUC')
D_MPNN_pt, D_MPNN_pt_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'D-MPNN pt', 'PR AUC')
random_guess, random_guess_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'Random guess', 'PR AUC')

In [None]:
make_plot_AUC('triazine_SIRT2_bin_PR_AUCs.png', 'triazine SIRT2', -0.003, 0.15, 'PR AUC')

In [None]:
OH_FFNN_bin, OH_FFNN_bin_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'OH-FFNN bin', 'ROC AUC')
FP_FFNN_bin, FP_FFNN_bin_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'FP-FFNN bin', 'ROC AUC')
D_MPNN_bin, D_MPNN_bin_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'D-MPNN bin', 'ROC AUC')
OH_FFNN, OH_FFNN_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'OH-FFNN', 'ROC AUC')
FP_FFNN, FP_FFNN_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'FP-FFNN', 'ROC AUC')
D_MPNN, D_MPNN_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'D-MPNN', 'ROC AUC')
OH_FFNN_pt, OH_FFNN_pt_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'OH-FFNN pt', 'ROC AUC')
FP_FFNN_pt, FP_FFNN_pt_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'FP-FFNN pt', 'ROC AUC')
D_MPNN_pt, D_MPNN_pt_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'D-MPNN pt', 'ROC AUC')
random_guess, random_guess_stdevs = get_avg_AUCs_stdevs('DD1S CAIX', 'Random guess', 'ROC AUC')

In [None]:
make_plot_AUC('DD1S_CAIX_bin_ROC_AUCs.png', 'DD1S CAIX', 0.4, 1, 'ROC AUC', ylabel=True, title=False)

In [None]:
OH_FFNN_bin, OH_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'OH-FFNN bin', 'ROC AUC')
FP_FFNN_bin, FP_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'FP-FFNN bin', 'ROC AUC')
D_MPNN_bin, D_MPNN_bin_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'D-MPNN bin', 'ROC AUC')
OH_FFNN, OH_FFNN_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'OH-FFNN', 'ROC AUC')
FP_FFNN, FP_FFNN_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'FP-FFNN', 'ROC AUC')
D_MPNN, D_MPNN_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'D-MPNN', 'ROC AUC')
OH_FFNN_pt, OH_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'OH-FFNN pt', 'ROC AUC')
FP_FFNN_pt, FP_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'FP-FFNN pt', 'ROC AUC')
D_MPNN_pt, D_MPNN_pt_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'D-MPNN pt', 'ROC AUC')
random_guess, random_guess_stdevs = get_avg_AUCs_stdevs('triazine sEH', 'Random guess', 'ROC AUC')

In [None]:
make_plot_AUC('triazine_sEH_bin_ROC_AUCs.png', 'triazine sEH', 0.4, 1.01, 'ROC AUC', title=False)

In [None]:
OH_FFNN_bin, OH_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'OH-FFNN bin', 'ROC AUC')
FP_FFNN_bin, FP_FFNN_bin_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'FP-FFNN bin', 'ROC AUC')
D_MPNN_bin, D_MPNN_bin_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'D-MPNN bin', 'ROC AUC')
OH_FFNN, OH_FFNN_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'OH-FFNN', 'ROC AUC')
FP_FFNN, FP_FFNN_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'FP-FFNN', 'ROC AUC')
D_MPNN, D_MPNN_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'D-MPNN', 'ROC AUC')
OH_FFNN_pt, OH_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'OH-FFNN pt', 'ROC AUC')
FP_FFNN_pt, FP_FFNN_pt_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'FP-FFNN pt', 'ROC AUC')
D_MPNN_pt, D_MPNN_pt_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'D-MPNN pt', 'ROC AUC')
random_guess, random_guess_stdevs = get_avg_AUCs_stdevs('triazine SIRT2', 'Random guess', 'ROC AUC')

In [None]:
make_plot_AUC('triazine_SIRT2_bin_ROC_AUCs.png', 'triazine SIRT2', 0.4, 1.01, 'ROC AUC', title=False)