In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import statistics
from tqdm import tqdm

In [None]:
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-paper')

In [None]:
DELQSAR_ROOT = os.getcwd() + '/../../'

In [None]:
def getAvgTestLossesStdevs(model_type, multiTask=False):
    if not multiTask:
        all_losses = np.array([
            df_data[df_data['model type'].isin([str(model_type)])]['random'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle1'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle2'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle3'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle12'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle13'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle23'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle123'],
        ])
    else:
        all_losses = np.array([
            df_data[df_data['model type'].isin([str(model_type)])]['random'],
            df_data[df_data['model type'].isin([str(model_type)])]['cycle123'],
        ])
    avg_losses = [statistics.mean(losses) for losses in all_losses]
    stdevs = [statistics.stdev(losses) for losses in all_losses]
    return avg_losses, stdevs

In [None]:
def make_plot_test_losses(img_name, y_lb, y_ub, multiTask=False, xlabels=True,
                          barWidth=0.25, eLineWidth=0.5, capSize=1, capThick=0.5):
    barWidth, eLineWidth, capSize, capThick = barWidth, eLineWidth, capSize, capThick
    if not multiTask:
        if 'SIRT2' in img_name:
            fig = plt.figure(figsize=(7, 2.2), dpi=300)
        else:
            fig = plt.figure(figsize=(7, 1.5), dpi=300)
        
        bars1 = OH_FFNN
        err1 = OH_FFNN_stdevs
        bars2 = FP_FFNN
        err2 = FP_FFNN_stdevs
        bars3 = D_MPNN
        err3 = D_MPNN_stdevs
        bars4 = OH_FFNN_pt
        err4 = OH_FFNN_pt_stdevs
        bars5 = FP_FFNN_pt
        err5 = FP_FFNN_pt_stdevs
        bars6 = D_MPNN_pt
        err6 = D_MPNN_pt_stdevs

        r1 = np.arange(2*len(bars1), step=2)
        r2 = [x + barWidth for x in r1]
        r3 = [x + barWidth for x in r2]
        r4 = [x + barWidth for x in r3]
        r5 = [x + barWidth for x in r4]
        r6 = [x + barWidth for x in r5]

        colors = ["#4878D0", "#6ACC64", "#D65F5F",
                "#956CB4", "#D5BB67", "#82C6E2"]
        plt.bar(r1, bars1, yerr=err1, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[0], width=barWidth, label='OH-FFNN', zorder=2)
        plt.bar(r2, bars2, yerr=err2, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[1], width=barWidth, label='FP-FFNN', zorder=2)
        plt.bar(r3, bars3, yerr=err3, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[2], width=barWidth, label='D-MPNN', zorder=2)
        plt.bar(r4, bars4, yerr=err4, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[3], width=barWidth, label='OH-FFNN_pt', zorder=2)
        plt.bar(r5, bars5, yerr=err5, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[4], width=barWidth, label='FP-FFNN_pt', zorder=2)
        plt.bar(r6, bars6, yerr=err6, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[5], width=barWidth, label='D-MPNN_pt', zorder=2)

        leg = plt.legend(loc='center left', bbox_to_anchor = (1,0.5), numpoints=1, fontsize=7)

        fig.canvas.draw() # required to get tick labels
        ax = plt.gca()
        ax.grid(zorder=1)
        ax.set_ylabel('Average test loss', fontsize=8)
        ax.set_ylim([y_lb, y_ub]) 
        if 'CAIX' in img_name:
            ax.set_yticks(np.arange(0.9, 1.25, step=0.1))
        ax.set_xticks([r + 2.5*barWidth for r in np.arange(2*len(bars1), step=2)])
        ax.tick_params(axis='x', length=0)
        if xlabels:
            ax.set_xticklabels(['Random', 'Cycle 1', 'Cycle 2', 'Cycle 3', 'Cycle 1+2', 'Cycle 1+3', 
                                'Cycle 2+3', 'Cycle 1+2+3'], rotation=60, ha='center')           
        else:
            ax.tick_params(labelsize=8, bottom=False, labelbottom=False)
    else:
        fig = plt.figure(figsize=(3.33, 2.5), dpi=300)
        
        bars1 = OH_FFNN_mt
        err1 = OH_FFNN_mt_stdevs
        bars2 = FP_FFNN_mt
        err2 = FP_FFNN_mt_stdevs
        bars3 = D_MPNN_mt
        err3 = D_MPNN_mt_stdevs
        bars4 = OH_FFNN_st
        err4 = OH_FFNN_st_stdevs
        bars5 = FP_FFNN_st
        err5 = FP_FFNN_st_stdevs
        bars6 = D_MPNN_st
        err6 = D_MPNN_st_stdevs
        
        r1 = np.arange(2*len(bars1), step=2)
        r2 = [x + barWidth for x in r1]
        r3 = [x + barWidth for x in r2]
        r4 = [x + barWidth for x in r3]
        r5 = [x + barWidth for x in r4]
        r6 = [x + barWidth for x in r5]

        colors = ["#4878D0", "#6ACC64", "#D65F5F",
                "#956CB4", "#D5BB67", "#82C6E2"]
        plt.bar(r1, bars1, yerr=err1, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[0], width=barWidth, label='OH-FFNN\nmulti-task', zorder=2)
        plt.bar(r2, bars2, yerr=err2, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[1], width=barWidth, label='FP-FFNN\nmulti-task', zorder=2)
        plt.bar(r3, bars3, yerr=err3, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[2], width=barWidth, label='D-MPNN\nmulti-task', zorder=2)
        plt.bar(r4, bars4, yerr=err4, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[3], width=barWidth, label='OH-FFNN\nsingle-task', zorder=2)
        plt.bar(r5, bars5, yerr=err5, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[4], width=barWidth, label='FP-FFNN\nsingle-task', zorder=2)
        plt.bar(r6, bars6, yerr=err6, error_kw = {'elinewidth': eLineWidth, 'capsize': capSize, 'capthick': capThick}, 
                color=colors[5], width=barWidth, label='D-MPNN\nsingle-task', zorder=2)

        leg = plt.legend(loc='center left', bbox_to_anchor = (1,0.5), numpoints=1, fontsize=6)

        fig.canvas.draw() # required to get tick labels
        ax = plt.gca()
        ax.grid(zorder=1)
        ax.set_ylabel('Average test loss', fontsize=8)
        ax.set_ylim([y_lb, y_ub]) 
        ax.set_yticks(np.arange(y_lb, y_ub+0.01, step=0.05))
        ax.tick_params(labelsize=8)
        ax.set_xticks([r + 2.5*barWidth for r in np.arange(2*len(bars1), step=2)])
        ax.set_xticklabels(['Random', 'Cycle\n1+2+3'], ha='center')
        ax.tick_params(axis='x', length=0)

    plt.tight_layout()
    plt.savefig(pathify(img_name), bbox_extra_artists=(leg,), bbox_inches='tight')
    plt.show()

# DD1S CAIX

In [None]:
if not os.path.isdir('DD1S_CAIX_test_loss_bar_graphs'):
    os.mkdir('DD1S_CAIX_test_loss_bar_graphs')
def pathify(fname):
    return os.path.join('DD1S_CAIX_test_loss_bar_graphs', fname)

## Test losses

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'DD1S_CAIX_test_losses.csv'))
df_data

In [None]:
OH_FFNN, OH_FFNN_stdevs = getAvgTestLossesStdevs('OH-FFNN')
FP_FFNN, FP_FFNN_stdevs = getAvgTestLossesStdevs('FP-FFNN')
D_MPNN, D_MPNN_stdevs = getAvgTestLossesStdevs('D-MPNN')
OH_FFNN_pt, OH_FFNN_pt_stdevs = getAvgTestLossesStdevs('OH-FFNN_pt')
FP_FFNN_pt, FP_FFNN_pt_stdevs = getAvgTestLossesStdevs('FP-FFNN_pt')
D_MPNN_pt, D_MPNN_pt_stdevs = getAvgTestLossesStdevs('D-MPNN_pt')

In [None]:
OH_FFNN, OH_FFNN_pt, FP_FFNN, FP_FFNN_pt, D_MPNN, D_MPNN_pt

In [None]:
OH_FFNN_stdevs, OH_FFNN_pt_stdevs, FP_FFNN_stdevs, FP_FFNN_pt_stdevs, D_MPNN_stdevs, D_MPNN_pt_stdevs

In [None]:
make_plot_test_losses('DD1S_CAIX_test_losses_bar_graph.png', 0.9, 1.25)

In [None]:
make_plot_test_losses('DD1S_CAIX_test_losses_bar_graph.png', 0.9, 1.25, xlabels=False)

# Triazine sEH

In [None]:
if not os.path.isdir('triazine_sEH_test_loss_bar_graphs'):
    os.mkdir('triazine_sEH_test_loss_bar_graphs')
def pathify(fname):
    return os.path.join('triazine_sEH_test_loss_bar_graphs', fname)

## Test losses

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'triazine_sEH_test_losses.csv'))
df_data

In [None]:
OH_FFNN, OH_FFNN_stdevs = getAvgTestLossesStdevs('OH-FFNN')
FP_FFNN, FP_FFNN_stdevs = getAvgTestLossesStdevs('FP-FFNN')
D_MPNN, D_MPNN_stdevs = getAvgTestLossesStdevs('D-MPNN')
OH_FFNN_pt, OH_FFNN_pt_stdevs = getAvgTestLossesStdevs('OH-FFNN_pt')
FP_FFNN_pt, FP_FFNN_pt_stdevs = getAvgTestLossesStdevs('FP-FFNN_pt')
D_MPNN_pt, D_MPNN_pt_stdevs = getAvgTestLossesStdevs('D-MPNN_pt')

In [None]:
OH_FFNN, OH_FFNN_pt, FP_FFNN, FP_FFNN_pt, D_MPNN, D_MPNN_pt

In [None]:
OH_FFNN_stdevs, OH_FFNN_pt_stdevs, FP_FFNN_stdevs, FP_FFNN_pt_stdevs, D_MPNN_stdevs, D_MPNN_pt_stdevs

In [None]:
make_plot_test_losses('triazine_sEH_test_losses_bar_graph.png', 0.4, 1.0, xlabels=False)

# Triazine SIRT2

In [None]:
if not os.path.isdir('triazine_SIRT2_test_loss_bar_graphs'):
    os.mkdir('triazine_SIRT2_test_loss_bar_graphs')
def pathify(fname):
    return os.path.join('triazine_SIRT2_test_loss_bar_graphs', fname)

## Test losses

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'triazine_SIRT2_test_losses.csv'))
df_data

In [None]:
OH_FFNN, OH_FFNN_stdevs = getAvgTestLossesStdevs('OH-FFNN')
FP_FFNN, FP_FFNN_stdevs = getAvgTestLossesStdevs('FP-FFNN')
D_MPNN, D_MPNN_stdevs = getAvgTestLossesStdevs('D-MPNN')
OH_FFNN_pt, OH_FFNN_pt_stdevs = getAvgTestLossesStdevs('OH-FFNN_pt')
FP_FFNN_pt, FP_FFNN_pt_stdevs = getAvgTestLossesStdevs('FP-FFNN_pt')
D_MPNN_pt, D_MPNN_pt_stdevs = getAvgTestLossesStdevs('D-MPNN_pt')

In [None]:
OH_FFNN, OH_FFNN_pt, FP_FFNN, FP_FFNN_pt, D_MPNN, D_MPNN_pt

In [None]:
OH_FFNN_stdevs, OH_FFNN_pt_stdevs, FP_FFNN_stdevs, FP_FFNN_pt_stdevs, D_MPNN_stdevs, D_MPNN_pt_stdevs

In [None]:
make_plot_test_losses('triazine_SIRT2_test_losses_bar_graph.png', 0.4, 0.9)

# Triazine sEH + SIRT2

In [None]:
if not os.path.isdir('triazine_multitask_sEH_SIRT2_test_loss_bar_graphs'):
    os.mkdir('triazine_multitask_sEH_SIRT2_test_loss_bar_graphs')
def pathify(fname):
    return os.path.join('triazine_multitask_sEH_SIRT2_test_loss_bar_graphs', fname)

## Test losses for sEH

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'triazine_multitask_sEH_test_losses.csv'))
df_data

In [None]:
OH_FFNN_mt, OH_FFNN_mt_stdevs = getAvgTestLossesStdevs('OH-FFNN_multi-task', multiTask=True)
FP_FFNN_mt, FP_FFNN_mt_stdevs = getAvgTestLossesStdevs('FP-FFNN_multi-task', multiTask=True)
D_MPNN_mt, D_MPNN_mt_stdevs = getAvgTestLossesStdevs('D-MPNN_multi-task', multiTask=True)
OH_FFNN_st, OH_FFNN_st_stdevs = getAvgTestLossesStdevs('OH-FFNN_single-task', multiTask=True)
FP_FFNN_st, FP_FFNN_st_stdevs = getAvgTestLossesStdevs('FP-FFNN_single-task', multiTask=True)
D_MPNN_st, D_MPNN_st_stdevs = getAvgTestLossesStdevs('D-MPNN_single-task', multiTask=True)

In [None]:
OH_FFNN_mt, OH_FFNN_st, FP_FFNN_mt, FP_FFNN_st, D_MPNN_mt, D_MPNN_st

In [None]:
OH_FFNN_mt_stdevs, OH_FFNN_st_stdevs, FP_FFNN_mt_stdevs, FP_FFNN_st_stdevs, D_MPNN_mt_stdevs, D_MPNN_st_stdevs

In [None]:
make_plot_test_losses('triazine_multitask_sEH_test_losses_bar_graph.png', 0.5, 0.7, multiTask=True)

## Test losses for SIRT2

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'triazine_multitask_SIRT2_test_losses.csv'))
df_data

In [None]:
OH_FFNN_mt, OH_FFNN_mt_stdevs = getAvgTestLossesStdevs('OH-FFNN_multi-task', multiTask=True)
FP_FFNN_mt, FP_FFNN_mt_stdevs = getAvgTestLossesStdevs('FP-FFNN_multi-task', multiTask=True)
D_MPNN_mt, D_MPNN_mt_stdevs = getAvgTestLossesStdevs('D-MPNN_multi-task', multiTask=True)
OH_FFNN_st, OH_FFNN_st_stdevs = getAvgTestLossesStdevs('OH-FFNN_single-task', multiTask=True)
FP_FFNN_st, FP_FFNN_st_stdevs = getAvgTestLossesStdevs('FP-FFNN_single-task', multiTask=True)
D_MPNN_st, D_MPNN_st_stdevs = getAvgTestLossesStdevs('D-MPNN_single-task', multiTask=True)

In [None]:
OH_FFNN_mt, OH_FFNN_st, FP_FFNN_mt, FP_FFNN_st, D_MPNN_mt, D_MPNN_st

In [None]:
OH_FFNN_mt_stdevs, OH_FFNN_st_stdevs, FP_FFNN_mt_stdevs, FP_FFNN_st_stdevs, D_MPNN_mt_stdevs, D_MPNN_st_stdevs

In [None]:
make_plot_test_losses('triazine_multitask_SIRT2_test_losses_bar_graph.png', 0.45, 0.6, multiTask=True)