In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import statistics
from tqdm import tqdm

In [None]:
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-paper')

matplotlib.rc('font', family='sans-serif')
matplotlib.rc('font', serif='Arial')
matplotlib.rc('text', usetex='false')

In [None]:
DELQSAR_ROOT = os.getcwd() + '/../../'

In [None]:
if not os.path.isdir('DD1S_CAIX_KNN_k_histograms'):
    os.mkdir('DD1S_CAIX_KNN_k_histograms')
def pathify(fname):
    return os.path.join('DD1S_CAIX_KNN_k_histograms', fname)

In [None]:
def make_histogram_for_hyperparam(df_data, model_type, metric, metric_name, img_name):
    hyperparam_values = df_data[df_data['model type'].isin(model_type) &
                                df_data['metric'].isin(metric)]['k']
    
    fig = plt.figure(figsize=(2.33, 1.6), dpi=300)
    vals = [1, 3, 5, 7, 9]
    barWidth = 0.9
        
    counts = [list(hyperparam_values).count(val) for val in vals]
    val_labels = [str(val) for val in vals]
    plt.bar(val_labels, counts, width=barWidth, zorder=2)
    
    fig.canvas.draw()
    ax = plt.gca()
    ax.tick_params(labelsize=7)
    
    ax.grid(zorder=1)
    ax.set_xlabel(f'best k\n(by {metric_name})', fontsize=8)
    ax.set_ylabel('frequency', fontsize=8)

    plt.tight_layout()
    plt.savefig(pathify(img_name))
    plt.show()

In [None]:
df_data = pd.read_csv(os.path.join(DELQSAR_ROOT, 'experiments', 'DD1S_CAIX_KNN_k_optimization_results.csv'))
df_data

In [None]:
make_histogram_for_hyperparam(df_data, ['OH-KNN'], ['NLL'], 'NLL loss', 'DD1S_CAIX_OH-KNN_NLL_k_histogram.png')

In [None]:
make_histogram_for_hyperparam(df_data, ['OH-KNN'], ['MSE'], 'MSE loss', 'DD1S_CAIX_OH-KNN_MSE_k_histogram.png')

In [None]:
make_histogram_for_hyperparam(df_data, ['OH-KNN'], ['rank corr coeff'], 'rank correlation coefficient', 'DD1S_CAIX_OH-KNN_rcc_k_histogram.png')

In [None]:
make_histogram_for_hyperparam(df_data, ['FP-KNN'], ['NLL'], 'NLL loss', 'DD1S_CAIX_FP-KNN_NLL_k_histogram.png')

In [None]:
make_histogram_for_hyperparam(df_data, ['FP-KNN'], ['MSE'], 'MSE loss', 'DD1S_CAIX_FP-KNN_MSE_k_histogram.png')

In [None]:
make_histogram_for_hyperparam(df_data, ['FP-KNN'], ['rank corr coeff'], 'rank correlation coefficient', 'DD1S_CAIX_FP-KNN_rcc_k_histogram.png')