In [None]:
import os
from pathlib import Path

import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import matplotlib.pyplot as plt

In [None]:
event_file = '/home/tolhsadum/events.out.tfevents.1691579244.pea2.dartmoor.3370.1'

# load the file
event_acc = EventAccumulator(event_file)
# print scalars
event_acc.Reload()
scalar_tags = event_acc.Tags()['scalars']

# Print the scalar tag names
print("Scalar Tags:")
for tag in scalar_tags:
    print(tag)
    
filtered_tags = ['epoch_train_loss', 'epoch_train_ctr_loss', 'val_mean_dice', 'val_distance', 'ctr_val_volume']

# do the same for all the tags containing 'best'
best_tags = [tag for tag in scalar_tags if 'best' in tag]
"""
epoch_train_loss
epoch_train_ctr_loss
val_mean_dice
ctr_val_loss
ctr_val_volume
val_distance
val_best_mean_dice
val_best_mean_loss
val_best_mean_distance

train = training
ctr = controls
val = validation
"""
translate_dict = {
    'epoch_train_loss': 'Training Loss',
    'epoch_train_ctr_loss': 'Training Controls Loss',
    'val_mean_dice': 'Validation Mean Dice',
    'ctr_val_loss': 'Validation Controls Loss',
    'ctr_val_volume': 'Validation Controls Volume',
    'val_distance': 'Validation Distance',
    'val_best_mean_dice': 'Validation Best Mean Dice',
    'val_best_mean_loss': 'Validation Best Mean Loss',
    'val_best_mean_distance': 'Validation Best Mean Distance'
}

In [None]:
# plot each scalar in a different subplot
fig, axs = plt.subplots(len(filtered_tags), 1, figsize=(10, 10))
for i, tag in enumerate(filtered_tags):
    x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(tag)])
    axs[i].plot(x, y)
    axs[i].set_title(tag)
    axs[i].set_xlabel('Epoch')
    axs[i].set_ylabel(tag)
plt.tight_layout()
plt.show()
# plot each scalar in a different subplot
fig, axs = plt.subplots(len(best_tags), 1, figsize=(10, 10))
for i, tag in enumerate(best_tags):
    x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(tag)])
    axs[i].plot(x, y)
    axs[i].set_title(tag)
    axs[i].set_xlabel('Epoch')
    axs[i].set_ylabel(tag)
plt.tight_layout()
plt.show()



In [None]:
# do the same for both tags lists but point to the best value in the plot and display its value
# each tag containing "loss", "distance" or "volume" the best value is the lowest
# create a list like the tags lists but with the best value function to apply
best_func_filtered_tags = [min if 'loss' in tag or 'distance' in tag or 'volume' in tag else max for tag in filtered_tags]
best_func_best_tags = [min if 'loss' in tag or 'distance' in tag or 'volume' in tag else max for tag in best_tags]

# plot each scalar in a different subplot
fig, axs = plt.subplots(len(filtered_tags), 1, figsize=(10, len(filtered_tags)*5))
for i, tag in enumerate(filtered_tags):
    x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(tag)])
    axs[i].plot(x, y)
    axs[i].set_xlabel('Epoch')
    axs[i].set_ylabel(tag)
    # find the best value
    best_value_func = best_func_filtered_tags[i]
    best_value = best_value_func(y)
    best_epoch = x[y.index(best_value)]
    axs[i].plot(best_epoch, best_value, 'ro')
    axs[i].set_title(f'{tag} (Best value: {best_value:.4f})')
plt.tight_layout()
plt.show()

# plot each scalar in a different subplot
fig, axs = plt.subplots(len(best_tags), 1, figsize=(10, len(best_tags)*5))
for i, tag in enumerate(best_tags):
    x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(tag)])
    axs[i].plot(x, y)
    axs[i].set_xlabel('Epoch')
    axs[i].set_ylabel(tag)
    # find the best value
    best_value_func = best_func_best_tags[i]
    best_value = best_value_func(y)
    best_epoch = x[y.index(best_value)]
    axs[i].plot(best_epoch, best_value, 'ro')
    axs[i].set_title(f'{tag} (Best value: {best_value:.4f})')
plt.tight_layout()
plt.show()

In [None]:
# plot each scalar in a different subplot and save each plot as a separate image file
output_folder = Path('/data/Dropbox (GIN)/result_ucl_lesseg')  # replace with your output folder path
for i, tag in enumerate(filtered_tags):
    fig, ax = plt.subplots(figsize=(10, 5))
    x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(tag)])
    ax.plot(x, y)
    ax.set_xlabel('Epoch')
    ax.set_ylabel(tag)
    # find the best value
    best_value_func = best_func_filtered_tags[i]
    best_value = best_value_func(y)
    best_epoch = x[y.index(best_value)]
    ax.plot(best_epoch, best_value, 'ro')
    ax.set_title(f'Best value: {best_value:.4f}')
    plt.tight_layout()
    plt.savefig(output_folder / f'{tag}.png')
    plt.close(fig)  # close the figure

# do the same for the best_tags list
for i, tag in enumerate(best_tags):
    fig, ax = plt.subplots(figsize=(10, 5))
    x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(tag)])
    ax.plot(x, y)
    ax.set_xlabel('Epoch')
    ax.set_ylabel(tag)
    # find the best value
    best_value_func = best_func_best_tags[i]
    best_value = best_value_func(y)
    best_epoch = x[y.index(best_value)]
    ax.plot(best_epoch, best_value, 'ro')
    ax.set_title(f'Best value: {best_value:.4f}')
    plt.tight_layout()
    plt.savefig(output_folder / f'{tag}.png')
    plt.close(fig)  # close the figure

In [None]:
def load_tensorboard_file(event_file, verbose=False):
    # Load the TensorBoard file
    event_acc = EventAccumulator(event_file)
    event_acc.Reload()

    # If verbose is True, print all the tags from the file
    if verbose:
        print("Tags:")
        for tag in event_acc.Tags()['scalars']:
            print(tag)

    return event_acc

def create_and_save_plots(event_acc, best_func_dict=None, output_folder=None, display_plot=True):
    # Get the list of scalar tags
    scalar_tags = event_acc.Tags()['scalars']

    # If best_func_dict is not provided, use a default function (min) for all tags
    if best_func_dict is None:
        best_func_dict = {tag: min for tag in scalar_tags}

    # Create and save a plot for each tag
    for tag in scalar_tags:
        fig, ax = plt.subplots(figsize=(10, 5))
        x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(tag)])
        ax.plot(x, y)
        ax.set_xlabel('Epoch')
        ax.set_ylabel(tag)
        # Find the best value
        best_value_func = best_func_dict.get(tag, min)  # Use min if the tag is not in best_func_dict
        best_value = best_value_func(y)
        best_epoch = x[y.index(best_value)]
        ax.plot(best_epoch, best_value, 'ro')
        ax.set_title(f'Best value: {best_value:.4f}')
        plt.tight_layout()
        # Save the plot if output_folder is provided
        if output_folder is not None:
            plt.savefig(output_folder / f'{tag}.png')
        # Display the plot if display_plot is True
        if display_plot:
            plt.show()
        plt.close(fig)  # Close the figure



In [None]:
def load_all_event_files(model_folder):
    """
    Load the latest event file from each subfolder in the given model folder.

    This function iterates over each subfolder in the model folder, finds the latest event file in each subfolder,
    and loads the data from these event files using the `load_tensorboard_file` function. The data from each event file
    is stored in a dictionary where the keys are the fold names (subfolder names) and the values are the EventAccumulator
    objects returned by `load_tensorboard_file`. This dictionary is returned by the function.

    Parameters:
    model_folder (str): The path to the model folder containing the subfolders with event files.

    Returns:
    dict: A dictionary where the keys are the fold names and the values are the EventAccumulator objects containing the
    data from the latest event file in each subfolder.
    """
    model_folder = Path(model_folder)
    fold_data = {}

    for fold_folder in model_folder.iterdir():
        if fold_folder.is_dir():
            event_files = list(fold_folder.rglob('events.out.tfevents.*'))
            if event_files:
                latest_event_file = max(event_files, key=os.path.getmtime)
                event_acc = load_tensorboard_file(str(latest_event_file))
                fold_data[fold_folder.name] = event_acc

    return fold_data


def plot_scalar_per_fold(fold_data, scalar_name, output_path=None, display_plot=True, best_func=None):
    """
    Plot the values of a scalar per fold and optionally save the plot to a file.

    This function iterates over the items in fold_data. For each item, the key is the fold name and the value is the
    EventAccumulator object. It checks if the EventAccumulator object contains the scalar. If it does, it gets the
    values of the scalar and plots these values. The fold name is used as the label for the plot.

    If output_path is provided, the function will save the plot to the specified path. If output_path is a directory,
    the function will create the directory if it doesn't exist and save the plot as scalar_name_folds_plot.png in that
    directory.

    If best_func is provided, the function will compute the best value for each scalar, signal it with a red dot on the
    plot, and print the best value and the epoch number of the best value next to the fold name in the legend.

    Parameters:
    fold_data (dict): A dictionary where the keys are the fold names and the values are the EventAccumulator objects.
    scalar_name (str): The name of a scalar.
    output_path (str, optional): The path where the plot will be saved. If this is a directory, the plot will be saved
        as scalar_name_folds_plot.png in this directory. Defaults to None.
    display_plot (bool, optional): Whether to display the plot. Defaults to True.
    best_func (function or dict, optional): A function to compute the best value for each scalar, or a dictionary mapping
        scalar names to such functions. Defaults to None.

    Raises:
    ValueError: If an EventAccumulator object does not contain the scalar.
    """
    for fold_name, event_acc in fold_data.items():
        if scalar_name not in event_acc.Tags()['scalars']:
            raise ValueError(f"The scalar '{scalar_name}' is not found in the fold '{fold_name}'.")

        x, y = zip(*[(s.step, s.value) for s in event_acc.Scalars(scalar_name)])

        if best_func is not None:
            # If best_func is a dictionary, get the function for the current scalar_name
            # Otherwise, use best_func directly
            best_func_scalar = best_func[scalar_name] if isinstance(best_func, dict) else best_func
            best_value = best_func_scalar(y)
            best_epoch = x[y.index(best_value)]
            plt.plot(best_epoch, best_value, 'ro')
            fold_name += f', best at {best_epoch}: {best_value:.4f}'

        plt.plot(x, y, label=fold_name)

    plt.xlabel('Epoch')
    plt.ylabel(scalar_name)
    plt.legend()

    if output_path is not None:
        output_path = Path(output_path)
        if output_path.is_dir():
            output_path.mkdir(parents=True, exist_ok=True)
            output_path = output_path / f'{scalar_name}_folds_plot.png'
        plt.savefig(output_path)

    if display_plot:
        plt.show()

    plt.close()

In [None]:
# /data/Dropbox (GIN)/UCL_Data/events_files contains several subfolders contains subfolders with event files
# For each subfolder of the root, load the event file and create and save the plots in /data/Dropbox (GIN)/UCL_Data/events_files/plots/subfolder_name

# best_func_filtered_tags = [min if 'loss' in tag or 'distance' in tag or 'volume' in tag else max for tag in filtered_tags]
best_func_filtered_tags = {tag: min if 'loss' in tag or 'distance' in tag or 'volume' in tag else max for tag in filtered_tags}
# best_func_best_tags = [min if 'loss' in tag or 'distance' in tag or 'volume' in tag else max for tag in best_tags]
best_func_best_tags = {tag: min if 'loss' in tag or 'distance' in tag or 'volume' in tag else max for tag in best_tags}
print(best_func_filtered_tags)
root_folder = Path('/data/Dropbox (GIN)/UCL_Data/events_files')
for subfolder in root_folder.iterdir():
    # root_folder / 'plots' should not be processed
    if subfolder.name == 'plots':
        continue
    print(f'Processing {subfolder.name}')
    # for each subfolder in subfolder
    for subsubfolder in subfolder.iterdir():
        # Load the event file
        event_file_list = list(subsubfolder.rglob('events.out.tfevents.*'))
        # only keep the latest event file using the date of modification
        event_file = max(event_file_list, key=os.path.getmtime)    
        event_acc = load_tensorboard_file(str(event_file))
        # Create the output folder subfolder_name/subsubfolder_name
        output_folder = root_folder / 'plots' / subfolder.name / subsubfolder.name
        output_folder.mkdir(parents=True, exist_ok=True)
        # Create and save the plots
        create_and_save_plots(event_acc, best_func_best_tags, output_folder=output_folder, display_plot=False)

In [None]:
root_folder = Path('/data/Dropbox (GIN)/UCL_Data/events_files')
# for each subfolder in root_folder call plot_scalar_per_fold to save each scalar plot in subfolder/global_plots/
best_func_best_tags = {tag: min if 'loss' in tag or 'distance' in tag or 'volume' in tag else max for tag in best_tags}
# print(best_func_filtered_tags)
for subfolder in root_folder.iterdir():
    # root_folder / 'plots' should not be processed
    if subfolder.name == 'plots':
        continue
    print(f'Processing {subfolder.name}')
    # Load the event files
    fold_data = load_all_event_files(subfolder)
    # Create the output folder subfolder_name/global_plots
    output_folder = root_folder / 'plots' / subfolder.name / 'global_plots'
    output_folder.mkdir(parents=True, exist_ok=True)
    # Create and save the plots
    for tag in best_tags:
        plot_scalar_per_fold(fold_data, tag, output_path=output_folder, display_plot=False, best_func=best_func_best_tags)