In [None]:
import os
import pandas as pd
from tensorboard.backend.event_processing import event_accumulator


In [None]:
def extract_and_transform_tensorboard_data(base_path, selected_metrics):

    experiments_data = []


    for experiment_id in os.listdir(base_path):
        exp_path = os.path.join(base_path, experiment_id)

        for subfolder in os.listdir(exp_path):
            log_dir = os.path.join(exp_path, subfolder)
            if os.path.isdir(log_dir):

                ea = event_accumulator.EventAccumulator(log_dir)
                ea.Reload()
                
                for tag in ea.Tags()['scalars']:
                    if (selected_metrics is not None) and (tag in selected_metrics):
                        
                        metrics = pd.DataFrame(ea.Scalars(tag), columns=['wall_time', 'step', 'value'])

                        last_metric = round(metrics.sort_values('step', ascending=False).iloc[0],2)
                        last_metric['metric'] = tag
                        last_metric['experiment_id'] = experiment_id
                        
                        experiments_data.append(last_metric)
                    if selected_metrics is None:
                        metrics = pd.DataFrame(ea.Scalars(tag), columns=['wall_time', 'step', 'value'])

                        last_metric = round(metrics.sort_values('step', ascending=False).iloc[0],2)
                        last_metric['metric'] = tag
                        last_metric['experiment_id'] = experiment_id
                        
                        experiments_data.append(last_metric)

    full_data = pd.DataFrame(experiments_data)
    wide_data = full_data.pivot_table(index=['experiment_id'], 
                                      columns='metric', 
                                      values='value', 
                                      aggfunc='first').reset_index()

    return wide_data

base_path = '/Results'

selected_metrics = ['train_acc_micro_epoch', 'val_auc', 'val_precision_recall_auc', 'val_f1_micro','val_f1_weighted', 
                    'val_acc_macro_epoch', 'val_precision', 'val_recall', 
                    'test_auc', 'test_precision', 'test_recall', 'test_precision_recall_auc', 'test_f1_micro','test_f1_weighted', 'test_acc_macro_epoch']

data = extract_and_transform_tensorboard_data(base_path, selected_metrics=selected_metrics)

In [None]:
data.columns

In [None]:
df = data.copy()

In [None]:
import re
def extract_experiment_and_annotator(exp_id):
    match = re.match(r"(DistillRoberta_(?:NO_)?FN)_(\w+)_", exp_id)
    if match:
        return match.groups()
    return None, None

def extract_experiment_and_annotator(exp_id):
    match = re.match(r"DistillRoberta_(NO_?|Partial_?)?FN_(\w+)_", exp_id)
    if match:
        experiment_type = f"DistillRoberta_{match.group(1)}FN" if match.group(1) else "DistillRoberta_FN"
        return experiment_type, match.group(2)
    return None, None


def extract_experiment_and_annotator(exp_id):
    match = re.match(r"DistillRoberta_(NO_?|Partial_?)?FN_(.+)", exp_id)
    if match:
        experiment_type = f"DistillRoberta_{match.group(1)}FN" if match.group(1) else "DistillRoberta_FN"
        annotator = match.group(2)  #captures everything after the prefix and "FN_"
        return experiment_type, annotator
    return None, None


df['experiment'], df['annotator'] = zip(*df['experiment_id'].apply(extract_experiment_and_annotator))


In [None]:
df_numeric = df.drop(columns=['experiment_id'])
grouped_numeric = df_numeric.groupby(['experiment', 'annotator']).agg(['mean'])
grouped_numeric_agg = grouped_numeric.groupby(['experiment']).agg(['mean', 'std'])

In [None]:
df_numeric.info()

In [None]:
grouped_numeric_agg['test_auc']

In [None]:
grouped_numeric_agg['test_recall']

In [None]:
grouped_numeric_agg.to_excel('/home/ancarani/tractive_models/speech_classification/groupedResults.xlsx', index=True)


In [None]:
grouped_numeric

In [None]:
grouped_numeric_agg.to_csv('results_agg.csv')

In [None]:
grouped_numeric.to_csv('grouped_results_FINAL.csv', sep=";")