In [None]:
import os
import pandas as pd
from tensorboard.backend.event_processing import event_accumulator


In [None]:
def extract_and_transform_tensorboard_data(base_path, selected_metrics):

    experiments_data = []

    for experiment_id in os.listdir(base_path):
        exp_path = os.path.join(base_path, experiment_id)

        for subfolder in os.listdir(exp_path):
            log_dir = os.path.join(exp_path, subfolder)
            if os.path.isdir(log_dir):
 
                ea = event_accumulator.EventAccumulator(log_dir)
                ea.Reload()
                
     
                for tag in ea.Tags()['scalars']:
                    if (selected_metrics is not None) and (tag in selected_metrics):
                        
                        metrics = pd.DataFrame(ea.Scalars(tag), columns=['wall_time', 'step', 'value'])

                        last_metric = round(metrics.sort_values('step', ascending=False).iloc[0],2)
                        last_metric['metric'] = tag
                        last_metric['experiment_id'] = experiment_id
                        
                        experiments_data.append(last_metric)
                    if selected_metrics is None:
                        metrics = pd.DataFrame(ea.Scalars(tag), columns=['wall_time', 'step', 'value'])

                        last_metric = round(metrics.sort_values('step', ascending=False).iloc[0],2)
                        last_metric['metric'] = tag
                        last_metric['experiment_id'] = experiment_id
                        
                        experiments_data.append(last_metric)

    full_data = pd.DataFrame(experiments_data)
    wide_data = full_data.pivot_table(index=['experiment_id'], 
                                      columns='metric', 
                                      values='value', 
                                      aggfunc='first').reset_index()

    return wide_data

base_path = '/data1/elisa/audio/NEURIPS_RESULTS_LINEAR'

selected_metrics = ['train_acc_micro_epoch', 'val_auc', 'val_precision_recall_auc', 'val_f1','val_f1_weighted', 
                    'val_acc_macro_epoch', 'val_precision', 'val_recall', 'val_acc',
                    'test_auc', 'test_precision', 'test_recall', 'test_precision_recall_auc', 'test_f1','test_f1_weighted', 'test_acc']

data = extract_and_transform_tensorboard_data(base_path, selected_metrics=selected_metrics)

In [None]:
df = data.copy()

In [None]:
import re

def extract_annotator(metric):
    match = re.search(r'Wav2Vec2_([^_]+)_audio_fold_\d+', metric)
    return match.group(1) if match else None

df_numeric = df.drop(columns=['experiment_id'])

In [None]:
mean_df = df_numeric.mean()
std_df = df_numeric.std()