In [92]:
import tensorboard as tb
tb.__version__
import os
from copy import deepcopy

In [93]:
def convert_tb_data(root_dir, sort_by=None):
    """Convert local TensorBoard data into Pandas DataFrame.
    
    Function takes the root directory path and recursively parses
    all events data.    
    If the `sort_by` value is provided then it will use that column
    to sort values; typically `wall_time` or `step`.
    
    *Note* that the whole data is converted into a DataFrame.
    Depending on the data size this might take a while. If it takes
    too long then narrow it to some sub-directories.
    
    Paramters:
        root_dir: (str) path to root dir with tensorboard data.
        sort_by: (optional str) column name to sort by.
    
    Returns:
        pandas.DataFrame with [wall_time, name, step, value] columns.
    
    """
    import os
    import pandas as pd
    from tensorflow.python.summary.summary_iterator import summary_iterator

    def convert_tfevent(filepath):
        return pd.DataFrame([
            parse_tfevent(e) for e in summary_iterator(filepath) if len(e.summary.value)
        ])

    def parse_tfevent(tfevent):
        return dict(
            wall_time=tfevent.wall_time,
            name=tfevent.summary.value[0].tag,
            step=tfevent.step,
            value=float(tfevent.summary.value[0].simple_value),
        )
    
    columns_order = ['wall_time', 'name', 'step', 'value']
    
    out = []
    for (root, _, filenames) in os.walk(root_dir):
        for filename in filenames:
            if "events.out.tfevents" not in filename:
                continue
            file_full_path = os.path.join(root, filename)
            out.append(convert_tfevent(file_full_path))
    # Concatenate (and sort) all partial individual dataframes
    all_df = pd.concat(out)[columns_order]
    if sort_by is not None:
        all_df = all_df.sort_values(sort_by)

    dict_out = dict()
    for name, group in all_df.groupby('name'):
        dict_out[name] = group.reset_index(drop=True)
        
    return dict_out

def all_df(path):
    out = dict()
    for folder in os.listdir(path):
        p = os.path.join(path, folder)
        out[folder] = convert_tb_data(p)
    return out

def do_it(path, isString = False):
    to_get = ['eval/1_precision', 'eval/1_recall', 'eval/1_f1-score', 'eval/2_precision', 'eval/2_recall', 'eval/2_f1-score', 'eval/weighted avg_f1-score']
    if isString:
        to_get = ['test/B-AC_precision', 'test/B-AC_recall', 'test/B-AC_f1-score', 'test/B-LF_precision', 'test/B-LF_recall', 'test/B-LF_f1-score', 'test/weighted avg_f1-score']
    all_dfs = all_df(path)
    o = []
    for key, value in all_dfs.items():
        add_to_o = []
        add_to_o.append(key)
        vs = []
        for k in to_get:
            add_to_o.append(round(value[k].tail(1).value.values[0],3))
        o.append(add_to_o)
    out = deepcopy(o)
    for i in range(len(o)):
        for j in range(len(o[i])):
            if isinstance(o[i][j], str):
                print(o[i][j], end=' ')
            else:
                to_compare_to = [o[k][j] for k in range(len(o))]
                if o[i][j] == max(to_compare_to):
                    out[i][j] = "\\" + 'textbf{' + str(out[i][j]) + '0' * (5 - len(str(out[i][j]))) + '}'
                else :
                    out[i][j] = str(out[i][j]) + '0' * (5 - len(str(out[i][j])))
                print('&', end=' ')
                print(out[i][j], end=' ')
        print('\\\\')


In [94]:
do_it('runs/finetuned/')

stanford & \textbf{0.827} & 0.869 & 0.847 & \textbf{0.842} & \textbf{0.892} & \textbf{0.866} & \textbf{0.951} \\
bert & 0.803 & \textbf{0.920} & \textbf{0.858} & 0.785 & 0.817 & 0.801 & 0.929 \\
distiledgpt2 & 0.613 & 0.897 & 0.728 & 0.467 & 0.627 & 0.535 & 0.866 \\
distilbert & 0.800 & 0.879 & 0.838 & 0.699 & 0.803 & 0.747 & 0.927 \\


In [95]:
do_it('runs/batch_size')

32 & 0.741 & 0.865 & 0.798 & 0.708 & 0.830 & 0.764 & 0.921 \\
16 & \textbf{0.801} & \textbf{0.867} & \textbf{0.833} & \textbf{0.761} & \textbf{0.837} & \textbf{0.797} & \textbf{0.930} \\
8 & 0.790 & \textbf{0.867} & 0.826 & 0.755 & 0.810 & 0.781 & 0.928 \\


In [96]:
do_it('runs/lr')

1e-5 & 0.720 & 0.892 & 0.797 & 0.668 & \textbf{0.841} & 0.744 & 0.917 \\
2e-4 & 0.719 & \textbf{0.893} & 0.797 & 0.710 & 0.737 & 0.723 & 0.913 \\
2e-5 & \textbf{0.801} & 0.867 & \textbf{0.833} & \textbf{0.761} & 0.837 & \textbf{0.797} & \textbf{0.930} \\


In [97]:
do_it('runs/vectorization', True)

word2vec & 0.477 & 0.403 & 0.437 & 0.438 & 0.329 & 0.375 & 0.859 \\
fasttext & \textbf{0.743} & \textbf{0.639} & \textbf{0.687} & \textbf{0.590} & \textbf{0.530} & \textbf{0.558} & \textbf{0.907} \\
glove & 0.505 & 0.608 & 0.552 & 0.565 & 0.523 & 0.544 & 0.899 \\


In [98]:
do_it('runs/loss_functions', True)

hinge_loss & 0.514 & \textbf{0.753} & 0.611 & 0.484 & \textbf{0.617} & \textbf{0.543} & 0.891 \\
cross_entropy & \textbf{0.572} & 0.696 & \textbf{0.628} & \textbf{0.551} & 0.503 & 0.526 & \textbf{0.901} \\


In [99]:
do_it('runs/weight_comp')

weird_w & \textbf{0.833} & 0.806 & 0.819 & \textbf{0.762} & 0.699 & 0.729 & 0.925 \\
big_w & 0.699 & \textbf{0.918} & 0.794 & 0.711 & \textbf{0.841} & 0.770 & 0.918 \\
normal_w & 0.801 & 0.867 & \textbf{0.833} & 0.761 & 0.837 & \textbf{0.797} & \textbf{0.930} \\
no_w & 0.808 & 0.847 & 0.827 & 0.729 & 0.827 & 0.775 & 0.927 \\
w & 0.786 & 0.872 & 0.827 & 0.741 & 0.820 & 0.778 & 0.929 \\


In [101]:
do_it('runs/optimizer', True)

AdamW & \textbf{0.694} & 0.646 & \textbf{0.669} & \textbf{0.593} & 0.490 & \textbf{0.537} & \textbf{0.913} \\
sgd & 0.398 & \textbf{0.878} & 0.547 & 0.450 & \textbf{0.604} & 0.516 & 0.867 \\
adam & 0.572 & 0.696 & 0.628 & 0.551 & 0.503 & 0.526 & 0.901 \\


In [102]:
do_it('runs/preprocessing', True)

stemmizer & 0.620 & 0.650 & 0.635 & 0.547 & 0.544 & 0.545 & 0.904 \\
lemmatized & 0.715 & 0.658 & 0.685 & 0.567 & \textbf{0.570} & 0.569 & \textbf{0.911} \\
nothing & \textbf{0.743} & 0.639 & \textbf{0.687} & 0.590 & 0.530 & 0.558 & 0.907 \\
lowercase & 0.595 & \textbf{0.677} & 0.633 & \textbf{0.617} & 0.530 & \textbf{0.570} & 0.908 \\
