In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import statistics
from tqdm import tqdm

In [None]:
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-paper')

matplotlib.rc('font', family='sans-serif')
matplotlib.rc('font', serif='Arial')
matplotlib.rc('text', usetex='false')

In [None]:
DELQSAR_ROOT = os.getcwd() + '/../../'

In [None]:
if not os.path.isdir('DD1S_CAIX_hyperparameter_histograms'):
    os.mkdir('DD1S_CAIX_hyperparameter_histograms')
def pathify(fname):
    return os.path.join('DD1S_CAIX_hyperparameter_histograms', fname)

In [None]:
def make_histogram_for_hyperparam(df_data, model_types, hyperparam, hyperparam_label, img_name, ytick_step_sizes):
    hyperparam_values = df_data[df_data['model type'].isin(model_types)][str(hyperparam)]
    
    fig = plt.figure(figsize=(2.33, 1.5), dpi=300)
    if hyperparam == 'depth':
        vals = [int(i) for i in np.arange(2, 6.1,1)]
        barWidth = 0.95
    elif hyperparam == 'hidden size':
        vals = [int(i) for i in np.arange(300, 2401, 100)]
        barWidth = 0.9
    elif hyperparam == 'FFN num layers':
        vals = [int(i) for i in np.arange(1, 3.1, 1)]
        barWidth = 0.95
    elif hyperparam == 'dropout':
        vals = [round(i, 2) for i in np.arange(0, 0.51, 0.05)]
        barWidth = 0.95
        
    counts = [list(hyperparam_values).count(val) for val in vals]
    if hyperparam == 'dropout':
        val_labels = ['0', '0.05', '0.1', '0.15', '0.2', '0.25', '0.3', '0.35', '0.4', '0.45', '0.5']
    else:
        val_labels = [str(val) for val in vals]
    plt.bar(val_labels, counts, width=barWidth, zorder=2)
    
    fig.canvas.draw()
    ax = plt.gca()
    ax.tick_params(labelsize=7)
    
    ax.grid(zorder=1)
    if hyperparam == 'hidden size':
        val_labels_sparse = [vl if vl%300==0 else '' for vl in vals]
        ax.set_xticklabels(val_labels_sparse, rotation=90)
        ax.set_yticks(np.arange(min(counts), max(counts)+1, ytick_step_sizes[0]))
    if hyperparam == 'dropout':
        val_labels_sparse = ['0', '', '0.1', '', '0.2', '', '0.3', '', '0.4', '', '0.5']
        ax.set_xticklabels(val_labels_sparse)
        ax.set_yticks(np.arange(min(counts), max(counts)+1, ytick_step_sizes[1]))
    ax.set_xlabel(str(hyperparam_label), fontsize=8)
    ax.set_ylabel('frequency', fontsize=8)

    plt.tight_layout()
    plt.savefig(pathify(img_name))
    plt.show()

# Regression models

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'DD1S_CAIX_hyperparameter_optimization_results.csv'))
df_data

In [None]:
hyperparams = {'depth': 'depth', 'FFN num layers': 'number of FFN layers', 
               'hidden size': 'hidden layer size', 'dropout': 'dropout rate'}
model_types = np.array([['D-MPNN'], ['D-MPNN_pt'], ['D-MPNN', 'D-MPNN_pt']])
for h in hyperparams.keys():
    for m in model_types:
        make_histogram_for_hyperparam(df_data, m, h, hyperparams[h], 
                                      f'DD1S_CAIX_{h}_histogram_{"_".join(m)}.png', [2, 5])

# Binary classifiers

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'bin_DD1S_CAIX_hyperparameter_optimization_results.csv'))
df_data

In [None]:
hyperparams = {'depth': 'depth', 'FFN num layers': 'number of FFN layers', 
               'hidden size': 'hidden layer size', 'dropout': 'dropout rate'}
for h in hyperparams.keys():
    make_histogram_for_hyperparam(df_data, ['D-MPNN'], h, hyperparams[h], 
                                  f'bin_DD1S_CAIX_{h}_histogram_D-MPNN.png', [1, 1])