In [1]:
import jsonlines
import os
import tqdm
import numpy as np
import pandas as pd
import yaml
pd.set_option('display.max_colwidth', None)

with open("config.yaml", "r") as fin:
    config = yaml.load(fin, Loader=yaml.FullLoader)

In [13]:
fnames = [f for f in os.listdir(config["results_dir"]) if f.startswith("metrics_")]

values_map = {}
for fname in fnames:
    input_file = os.path.join(config["results_dir"], fname)
    m = pd.read_csv(input_file, header=None)
    m.columns = ["metric", "value"]
    dataset = fname.split("_")[1]
    topp = fname.split("_")[-2]
    for i in range(len(m)):
        metric = m["metric"].values[i]
        value = m["value"].values[i]
        key = (metric, dataset, topp)
        assert key not in values_map
        values_map[key] = value

order = [
    ("prr_wer_hypo_entropy", "topp1.0",
     "Joint-sequence, length-normalized entropy from importance weighting beam search with beam of 20"),
    ("prr_wer_samp_entropy", "topp1.0",
     "Joint-sequence, length normalized entropy from 100 ancestral samples"),
    ("prr_wer_samp_entropy", "topp0.9",
     "Joint-sequence, length normalized entropy from 100 nucleus samples (p=0.9)"),
    ("prr_wer_mean_samp_wers", "topp1.0",
     "Average word error rate between hypothesis and 100 ancestral samples"),
    ("prr_wer_mean_samp_wers", "topp0.9",
     "Average word error rate between hypothesis and 100 nucleus samples (p=0.9)"),
    ("prr_wer_neg_mean_token_logprobs", "topp1.0",
     "Negative mean of the token probabilities of the predicted transcription"),
    ("prr_wer_neg_sum_token_logprobs", "topp1.0",
     "Negative sum of the token probabilities of the predicted transcription")
]

datasets = [
    ("librispeechclean", "LTC"),
    ("librispeechother", "LTO"),
    ("tedlium", "TED"),
    ("wmt", "WMT")
]

table = []
for metric, topp, desc in order:
    row = {}
    row["Method"] = desc
    for dataset, name in datasets:
        row[name] = np.round(values_map[(metric, dataset, topp)], 4)
    table.append(row)
df = pd.DataFrame(table)
df

Unnamed: 0,Method,LTC,LTO,TED,WMT
0,"Joint-sequence, length-normalized entropy from importance weighting beam search with beam of 20",0.7042,0.7279,0.6794,0.2874
1,"Joint-sequence, length normalized entropy from 100 ancestral samples",0.6497,0.6517,0.5419,0.1136
2,"Joint-sequence, length normalized entropy from 100 nucleus samples (p=0.9)",0.699,0.7215,0.6496,0.2224
3,Average word error rate between hypothesis and 100 ancestral samples,0.5818,0.6671,0.7238,0.2
4,Average word error rate between hypothesis and 100 nucleus samples (p=0.9),0.7061,0.7468,0.7187,0.331
5,Negative mean of the token probabilities of the predicted transcription,0.7015,0.7263,0.6701,0.2914
6,Negative sum of the token probabilities of the predicted transcription,0.2964,0.3465,0.1697,0.0604


In [14]:
values_map[("corpus_bleu", "wmt", "topp1.0")]

0.3685743159761693