In [35]:
import jsonlines
import os
import tqdm
import numpy as np
import pandas as pd
import yaml
pd.set_option('display.max_colwidth', None)

with open("config.yaml", "r") as fin:
    config = yaml.load(fin, Loader=yaml.FullLoader)

In [39]:
fnames = [f for f in os.listdir(config["results_dir"]) if f.startswith("metrics_")]

values_map = {}
for fname in fnames:
    input_file = os.path.join(config["results_dir"], fname)
    m = pd.read_csv(input_file, header=None)
    m.columns = ["metric", "value"]
    dataset = fname.split("_")[1]
    topp = fname.split("_")[-2]
    for i in range(len(m)):
        metric = m["metric"].values[i]
        if metric == "mean_wer":
            continue
        value = m["value"].values[i]
        key = (metric, dataset, topp)
        assert key not in values_map
        values_map[key] = value

order = [
    ("prr_hypo_entropy", "topp1.0",
     "Joint-sequence, length-normalized entropy from importance weighting beam search with beam of 20"),
    ("prr_samp_entropy", "topp1.0",
     "Joint-sequence, length normalized entropy from 100 ancestral samples"),
    ("prr_samp_entropy", "topp0.9",
     "Joint-sequence, length normalized entropy from 100 nucleus samples (p=0.9)"),
    ("prr_mean_samp_wers", "topp1.0",
     "Average word error rate between hypothesis and 100 ancestral samples"),
    ("prr_mean_samp_wers", "topp0.9",
     "Average word error rate between hypothesis and 100 nucleus samples (p=0.9)"),
    ("prr_neg_mean_token_logprobs", "topp1.0",
     "Negative mean of the token probabilities of the predicted transcription"),
    ("prr_neg_sum_token_logprobs", "topp1.0",
     "Negative sum of the token probabilities of the predicted transcription")
]
table = []
for metric, topp, desc in order:
    row = {}
    row["Method"] = desc
    for dataset, name in [("librispeechclean", "LTC"), ("librispeechother", "LTO"), ("tedlium", "TED")]:
        row[name] = np.round(values_map[(metric, dataset, topp)], 4)
    table.append(row)
df = pd.DataFrame(table)
df

Unnamed: 0,Method,LTC,LTO,TED
0,"Joint-sequence, length-normalized entropy from importance weighting beam search with beam of 20",0.7041,0.7276,0.6786
1,"Joint-sequence, length normalized entropy from 100 ancestral samples",0.6403,0.6438,0.5377
2,"Joint-sequence, length normalized entropy from 100 nucleus samples (p=0.9)",0.6984,0.7218,0.6481
3,Average word error rate between hypothesis and 100 ancestral samples,0.5824,0.6676,0.7168
4,Average word error rate between hypothesis and 100 nucleus samples (p=0.9),0.7026,0.7491,0.7177
5,Negative mean of the token probabilities of the predicted transcription,0.7015,0.726,0.6701
6,Negative sum of the token probabilities of the predicted transcription,0.2964,0.3459,0.169
