In [None]:
import os
from glob import glob

fig_dir = "figures/crossvalidation"
os.makedirs(fig_dir, exist_ok=True)

# set up the crossvalidation folders
crossvalidation_folders = glob(
    os.path.join(
        "..",
        "..",
        "scripts",
        "segmentation_training",
        "*fold*",
    )
)

# parse the crossvalidation folders into parameters
cv_details = [
    {
        "folder": sorted(glob(os.path.join(folder, "model*")))[-1],
        "model": folder.split("-")[0].split("/")[-1],
        **{
            k: float(v)
            for k, v in zip(
                ["first_layer_channels", "dropout_rate", "learning_rate", "fold"],
                folder.split("-")[2::2],
            )
        },
    }
    for folder in crossvalidation_folders
]

In [None]:
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator


# load the details of the cv results
for config in cv_details:
    # find the event file with the data stored
    event_file = glob(
        os.path.join(
            config["folder"],
            "log",
            "events.out*",
        )
    )[0]

    # load the data from the file
    event_acc = EventAccumulator(event_file)
    event_acc.Reload()

    # store the results
    config["train_loss"] = [v.value for v in event_acc.Scalars("Loss/train")]
    config["val_loss"] = [v.value for v in event_acc.Scalars("Loss/validation")]
    for metric in ["auprc", "recall", "precision", "f1", "accuracy"]:
        config[metric] = [v.value for v in event_acc.Scalars(f"Metrics/{metric}")]

In [None]:
import pandas as pd
import numpy as np


# make dataframe
df_results = pd.DataFrame.from_dict(cv_details)

# take min/max over things
df_results["first_layer_channels"] = df_results["first_layer_channels"].apply(int)
df_results["folder"] = df_results["fold"].apply(int)
df_results["min_validation_loss"] = df_results["val_loss"].apply(min)
df_results["idx_min_validation_loss"] = df_results["val_loss"].apply(np.argmin)
df_results.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# for font embeddings
import matplotlib

matplotlib.rcParams["pdf.fonttype"] = 42
matplotlib.rcParams["ps.fonttype"] = 42

SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

# rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
plt.rcParams["font.family"] = "serif"
plt.rc("font", size=SMALL_SIZE)  # controls default text sizes
plt.rc("axes", titlesize=SMALL_SIZE)  # fontsize of the axes title
plt.rc("axes", labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
plt.rc("xtick", labelsize=SMALL_SIZE)  # fontsize of the tick labels
plt.rc("ytick", labelsize=SMALL_SIZE)  # fontsize of the tick labels
plt.rc("legend", fontsize=SMALL_SIZE)  # legend fontsize
plt.rc("figure", titlesize=BIGGER_SIZE)  # fontsize of the figure title

palette = ["teal", "plum", "goldenrod"]

# Plot the results for a CV run
f, ax = plt.subplots(figsize=(6, 4))
ax.set(xscale="log")
sns.lineplot(
    data=df_results,
    x="learning_rate",
    y="min_validation_loss",
    hue="first_layer_channels",
    palette=palette,
    linewidth=3,
    ax=ax,
)
ax.set_xlabel("Learning Rate")
ax.set_ylabel("Best Validation Loss")
ax.legend(title="Network First Layer Size")
ax.grid(axis="y")
plt.tight_layout()
fig_file = os.path.join(fig_dir, "best_validation_loss.{}")
plt.savefig(fig_file.format("png"))
plt.savefig(fig_file.format("pdf"))
plt.show()