### 0) Utils

In [36]:
import pandas as pd
import numpy as np
import re
import os

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap

In [37]:
def load_metrics_from_txt(file_path):

    data_dict = dict()

    with open(file_path, 'r') as file:
        
        for line in file:

            line = line.strip()
            
            metric_name = re.search(r'(?i)^([a-z\_]+)', line)[0]
            metric_value = float(re.search(r'(\d+\.\d+)', line)[0])

            data_dict[metric_name] = metric_value

    return data_dict

In [38]:
def create_bar_plot(metric_mean_name, metric_std_name,
                    xlabel, ylabel, title):

    global data_dict

    # Sample data
    categories = [key for key in data_dict.keys()]
    values = [data_dict[key][metric_mean_name] for key in data_dict.keys()]
    std_devs = [data_dict[key][metric_std_name] for key in data_dict.keys()]

    # Create bar plot
    plt.figure(figsize=(6,4))

    plt.bar(categories, values, yerr=std_devs, capsize=5)

    # Add values on top of each bar
    for i in range(len(categories)):
        plt.text(i + 0.25, values[i], f'{values[i]:.2f}', ha='center')

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.show()

In [39]:
def create_grouped_bar_plot(metric_mean_names, metric_std_names,
                            xlabel, ylabel, title,
                            colormap='Spectral'):

    global data_dict

    num_metrics = len(metric_mean_names)

    # Prepare data
    categories = [key for key in data_dict.keys()]
    bar_width = 0.27  # Width of each bar

    # Set up subplots
    fig, ax = plt.subplots(figsize=(12, 6))

    # Calculate the width for each group
    total_bar_width = bar_width * num_metrics
    bar_positions = np.arange(len(categories))  # x-axis locations for the groups

    cmap = get_cmap(colormap)
    colors = [cmap(i) for i in np.linspace(0.3, 0.9, num_metrics)]  # Get colors from the colormap


    for i in range(num_metrics):
        metric_values = [data_dict[key][metric_mean_names[i]] for key in data_dict.keys()]
        metric_std_devs = [data_dict[key][metric_std_names[i]] for key in data_dict.keys()]

        # Shift the x position for each group
        x_pos = [pos + i * bar_width - total_bar_width / 2 for pos in bar_positions]

        # Create bars
        bars = ax.bar(x_pos, metric_values, bar_width, label=['Train', 'Dev', 'Test'][i], yerr=metric_std_devs,
                      capsize=5, color=colors[i])

        # Add values on top of each bar
        for j, bar in enumerate(bars):
            ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2, f'{metric_values[j]:.2f}', ha='center')

    # Set labels and title
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.set_xticks(bar_positions)
    ax.set_xticklabels(categories)
    ax.legend()

    plt.tight_layout()
    plt.show()

In [40]:
def create_grouped_bar_plot_relative_improvement(metric_mean_names,
                            xlabel, ylabel, title,
                            colormap='Spectral'):

    global data_dict

    num_metrics = len(metric_mean_names)

    # Prepare data
    categories = [key for key in data_dict.keys() if 'text_only' not in key]
    bar_width = 0.27  # Width of each bar

    # Set up subplots
    fig, ax = plt.subplots(figsize=(12, 6))

    # Calculate the width for each group
    total_bar_width = bar_width * num_metrics
    bar_positions = np.arange(len(categories))  # x-axis locations for the groups

    cmap = get_cmap(colormap)
    colors = [cmap(i) for i in np.linspace(0.3, 0.9, num_metrics)]  # Get colors from the colormap


    for i in range(num_metrics):
        metric_values = [data_dict[key][metric_mean_names[i]] for key in data_dict.keys() if 'text_only' not in key]

        # Shift the x position for each group
        x_pos = [pos + i * bar_width - total_bar_width / 2 for pos in bar_positions]

        # Create bars
        bars = ax.bar(x_pos, metric_values, bar_width, label=['Train', 'Dev', 'Test'][i],
                      capsize=5, color=colors[i])

        # Add values on top of each bar
        for j, bar in enumerate(bars):
            ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2, f'{metric_values[j]:.2f}', ha='center')

    # Set labels and title
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.set_xticks(bar_positions)
    ax.set_xticklabels(categories)
    ax.legend()

    plt.tight_layout()
    plt.show()

### 1) Load data

In [41]:
data_dict = dict()

data_dict['Architecture 1_Text Only'] = load_metrics_from_txt('C:\\Users\\danie\\Documents\\tfm\\tfm\\notebooks\\results\\correct_transcript\\avg_metrics\\text_only_output.txt')

data_dict['Architecture 2_Wav2Vec2.0'] = load_metrics_from_txt('C:\\Users\\danie\\Documents\\tfm\\tfm\\notebooks\\results\\correct_transcript\\avg_metrics\\wav2vec2_output.txt')
data_dict['Architecture 2_HuBERT'] = load_metrics_from_txt('C:\\Users\\danie\\Documents\\tfm\\tfm\\notebooks\\results\\correct_transcript\\avg_metrics\\hubert_output.txt')
data_dict['Architecture 2_UniSpeechSAT'] = load_metrics_from_txt('C:\\Users\\danie\\Documents\\tfm\\tfm\\notebooks\\results\\correct_transcript\\avg_metrics\\uniSpeechSat_output.txt')

In [42]:
df = pd.DataFrame(data_dict).T
df

Unnamed: 0,mean_train_loss,std_train_loss,mean_dev_loss,std_dev_loss,mean_test_loss,std_test_loss,mean_train_accuracy,std_train_accuracy,mean_dev_accuracy,std_dev_accuracy,...,std_test_urs,mean_train_loss_improvement,mean_dev_loss_improvement,mean_test_loss_improvement,mean_train_accuracy_improvement,mean_dev_accuracy_improvement,mean_test_accuracy_improvement,mean_train_urs_improvement,mean_dev_urs_improvement,mean_test_urs_improvement
Architecture 1_Text Only,0.703904,0.004725,0.920081,0.010215,3.477648,0.104656,76.40669,0.102719,71.314664,0.405847,...,0.454511,,,,,,,,,
Architecture 2_Wav2Vec2.0,0.629902,0.013954,0.891169,0.009306,2.929761,0.04369,80.143512,0.355618,71.473486,0.245782,...,0.52038,10.513015,3.142412,15.754547,4.8907,0.222707,0.530659,0.779702,1.510768,0.997608
Architecture 2_HuBERT,0.621426,0.014666,0.886532,0.007638,2.968183,0.033487,80.401833,0.382857,71.706859,0.277238,...,0.799805,11.717226,3.646305,14.6497,5.228786,0.54995,0.323702,1.156824,1.210758,0.376648
Architecture 2_UniSpeechSAT,0.629697,0.014004,0.89108,0.009232,2.939998,0.042839,80.164486,0.354908,71.548036,0.285159,...,0.617532,10.54226,3.151999,15.460169,4.918151,0.327243,0.543925,0.758555,1.382192,0.875452


In [43]:
# Remove unnecessary columns
cols_to_drop = df.columns[df.columns.str.contains('improvement')]
df = df.drop(cols_to_drop, axis=1)
df

Unnamed: 0,mean_train_loss,std_train_loss,mean_dev_loss,std_dev_loss,mean_test_loss,std_test_loss,mean_train_accuracy,std_train_accuracy,mean_dev_accuracy,std_dev_accuracy,mean_test_accuracy,std_test_accuracy,mean_train_urs,std_train_urs,mean_dev_urs,std_dev_urs,mean_test_urs,std_test_urs
Architecture 1_Text Only,0.703904,0.004725,0.920081,0.010215,3.477648,0.104656,76.40669,0.102719,71.314664,0.405847,47.733577,1.263322,86.536667,0.305541,89.089347,0.911895,86.799205,0.454511
Architecture 2_Wav2Vec2.0,0.629902,0.013954,0.891169,0.009306,2.929761,0.04369,80.143512,0.355618,71.473486,0.245782,47.480274,0.789904,87.211395,0.351636,90.435281,0.561166,87.66512,0.52038
Architecture 2_HuBERT,0.621426,0.014666,0.886532,0.007638,2.968183,0.033487,80.401833,0.382857,71.706859,0.277238,47.888091,0.762009,87.537744,0.391422,90.168003,0.606279,87.126132,0.799805
Architecture 2_UniSpeechSAT,0.629697,0.014004,0.89108,0.009232,2.939998,0.042839,80.164486,0.354908,71.548036,0.285159,47.473942,0.795228,87.193095,0.343608,90.320733,0.568426,87.55909,0.617532


In [44]:
# Keep only until the 3rh decimal
df = df.round(3).astype(str)
df

Unnamed: 0,mean_train_loss,std_train_loss,mean_dev_loss,std_dev_loss,mean_test_loss,std_test_loss,mean_train_accuracy,std_train_accuracy,mean_dev_accuracy,std_dev_accuracy,mean_test_accuracy,std_test_accuracy,mean_train_urs,std_train_urs,mean_dev_urs,std_dev_urs,mean_test_urs,std_test_urs
Architecture 1_Text Only,0.704,0.005,0.92,0.01,3.478,0.105,76.407,0.103,71.315,0.406,47.734,1.263,86.537,0.306,89.089,0.912,86.799,0.455
Architecture 2_Wav2Vec2.0,0.63,0.014,0.891,0.009,2.93,0.044,80.144,0.356,71.473,0.246,47.48,0.79,87.211,0.352,90.435,0.561,87.665,0.52
Architecture 2_HuBERT,0.621,0.015,0.887,0.008,2.968,0.033,80.402,0.383,71.707,0.277,47.888,0.762,87.538,0.391,90.168,0.606,87.126,0.8
Architecture 2_UniSpeechSAT,0.63,0.014,0.891,0.009,2.94,0.043,80.164,0.355,71.548,0.285,47.474,0.795,87.193,0.344,90.321,0.568,87.559,0.618


In [45]:
# Extract arquitecture
df = df.reset_index(drop=False)\
       .rename({'index': 'Modelo'}, axis=1)
df['Architecture'] = df['Modelo'].str.extract(r'^(Architecture \d)')
df['Modelo'] = df['Modelo'].str.extract(r'^Architecture \d_(.*)')
df = df.rename({'Modelo': 'Audio_embedding'}, axis=1)

df = df[df.columns[[-1]].tolist() + df.columns[[0]].tolist() + df.columns[1:-1].tolist()]

df

Unnamed: 0,Architecture,Audio_embedding,mean_train_loss,std_train_loss,mean_dev_loss,std_dev_loss,mean_test_loss,std_test_loss,mean_train_accuracy,std_train_accuracy,mean_dev_accuracy,std_dev_accuracy,mean_test_accuracy,std_test_accuracy,mean_train_urs,std_train_urs,mean_dev_urs,std_dev_urs,mean_test_urs,std_test_urs
0,Architecture 1,Text Only,0.704,0.005,0.92,0.01,3.478,0.105,76.407,0.103,71.315,0.406,47.734,1.263,86.537,0.306,89.089,0.912,86.799,0.455
1,Architecture 2,Wav2Vec2.0,0.63,0.014,0.891,0.009,2.93,0.044,80.144,0.356,71.473,0.246,47.48,0.79,87.211,0.352,90.435,0.561,87.665,0.52
2,Architecture 2,HuBERT,0.621,0.015,0.887,0.008,2.968,0.033,80.402,0.383,71.707,0.277,47.888,0.762,87.538,0.391,90.168,0.606,87.126,0.8
3,Architecture 2,UniSpeechSAT,0.63,0.014,0.891,0.009,2.94,0.043,80.164,0.355,71.548,0.285,47.474,0.795,87.193,0.344,90.321,0.568,87.559,0.618


In [46]:
# Create train excel sheet
columns_train = ['Architecture', 'Audio_embedding'] + df.columns[df.columns.str.contains('train')].tolist()
df_train = df[columns_train]

df_train['Loss'] = df_train['mean_train_loss'] + ' ± ' + df_train['std_train_loss']
df_train['Accuracy'] = df_train['mean_train_accuracy'] + ' ± ' + df_train['std_train_accuracy']
df_train['URS'] = df_train['mean_train_urs'] + ' ± ' + df_train['std_train_urs']

final_cols = ['Architecture', 'Audio_embedding', 'Loss', 'Accuracy', 'URS']
df_train = df_train[final_cols]
df_train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['Loss'] = df_train['mean_train_loss'] + ' ± ' + df_train['std_train_loss']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['Accuracy'] = df_train['mean_train_accuracy'] + ' ± ' + df_train['std_train_accuracy']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['URS'] = df_trai

Unnamed: 0,Architecture,Audio_embedding,Loss,Accuracy,URS
0,Architecture 1,Text Only,0.704 ± 0.005,76.407 ± 0.103,86.537 ± 0.306
1,Architecture 2,Wav2Vec2.0,0.63 ± 0.014,80.144 ± 0.356,87.211 ± 0.352
2,Architecture 2,HuBERT,0.621 ± 0.015,80.402 ± 0.383,87.538 ± 0.391
3,Architecture 2,UniSpeechSAT,0.63 ± 0.014,80.164 ± 0.355,87.193 ± 0.344


In [47]:
# Create dev excel sheet
columns_dev = ['Architecture', 'Audio_embedding'] + df.columns[df.columns.str.contains('dev')].tolist()
df_dev = df[columns_dev]

df_dev['Loss'] = df_dev['mean_dev_loss'] + ' ± ' + df_dev['std_dev_loss']
df_dev['Accuracy'] = df_dev['mean_dev_accuracy'] + ' ± ' + df_dev['std_dev_accuracy']
df_dev['URS'] = df_dev['mean_dev_urs'] + ' ± ' + df_dev['std_dev_urs']

final_cols = ['Architecture', 'Audio_embedding', 'Loss', 'Accuracy', 'URS']
df_dev = df_dev[final_cols]
df_dev

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dev['Loss'] = df_dev['mean_dev_loss'] + ' ± ' + df_dev['std_dev_loss']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dev['Accuracy'] = df_dev['mean_dev_accuracy'] + ' ± ' + df_dev['std_dev_accuracy']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dev['URS'] = df_dev['mean_dev_urs'] + ' ± 

Unnamed: 0,Architecture,Audio_embedding,Loss,Accuracy,URS
0,Architecture 1,Text Only,0.92 ± 0.01,71.315 ± 0.406,89.089 ± 0.912
1,Architecture 2,Wav2Vec2.0,0.891 ± 0.009,71.473 ± 0.246,90.435 ± 0.561
2,Architecture 2,HuBERT,0.887 ± 0.008,71.707 ± 0.277,90.168 ± 0.606
3,Architecture 2,UniSpeechSAT,0.891 ± 0.009,71.548 ± 0.285,90.321 ± 0.568


In [48]:
# Create test excel sheet
columns_test = ['Architecture', 'Audio_embedding'] + df.columns[df.columns.str.contains('test')].tolist()
df_test = df[columns_test]

df_test['Loss'] = df_test['mean_test_loss'] + ' ± ' + df_test['std_test_loss']
df_test['Accuracy'] = df_test['mean_test_accuracy'] + ' ± ' + df_test['std_test_accuracy']
df_test['URS'] = df_test['mean_test_urs'] + ' ± ' + df_test['std_test_urs']

final_cols = ['Architecture', 'Audio_embedding', 'Loss', 'Accuracy', 'URS']
df_test = df_test[final_cols]
df_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['Loss'] = df_test['mean_test_loss'] + ' ± ' + df_test['std_test_loss']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['Accuracy'] = df_test['mean_test_accuracy'] + ' ± ' + df_test['std_test_accuracy']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['URS'] = df_test['mean_test

Unnamed: 0,Architecture,Audio_embedding,Loss,Accuracy,URS
0,Architecture 1,Text Only,3.478 ± 0.105,47.734 ± 1.263,86.799 ± 0.455
1,Architecture 2,Wav2Vec2.0,2.93 ± 0.044,47.48 ± 0.79,87.665 ± 0.52
2,Architecture 2,HuBERT,2.968 ± 0.033,47.888 ± 0.762,87.126 ± 0.8
3,Architecture 2,UniSpeechSAT,2.94 ± 0.043,47.474 ± 0.795,87.559 ± 0.618


In [49]:
with pd.ExcelWriter('table_comparison_correct_transcript.xlsx') as writer:
    # Write each dataframe to a separate worksheet
    df_train.to_excel(writer, sheet_name='Train', index=False)
    df_dev.to_excel(writer, sheet_name='Dev', index=False)
    df_test.to_excel(writer, sheet_name='Test', index=False)