In [2]:
import numpy as np
import pandas as pd
from os.path import join
import scipy.stats

In [3]:
def mean_confidence_interval(data, boldface=False, decimal=2, confidence=0.95):
    data = data[~np.isnan(data)]
    a = 1.0 * np.array(data)
    n = len(a)
    mean, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    if decimal == 2:
        if boldface:
            string = f'$\mathbf{{{mean:.2f} \pm {h:.2f}}}$'
        else:
            string = f'${mean:.2f} \pm {h:.2f}$'
    elif decimal == 1:
        if float(f"{mean:.1f}") < 10.0:
            if boldface:
                string = f'$\:\:\mathbf{{{mean:.1f} \pm {h:.1f}}}$'
            else:
                string = f'$\:\:{mean:.1f} \pm {h:.1f}$'
        else:
            if boldface:
                string = f'$\mathbf{{{mean:.1f} \pm {h:.1f}}}$'
            else:
                string = f'${mean:.1f} \pm {h:.1f}$'
    return string    

def print_mean(data, boldface=False, decimal=2):
    data = data[~np.isnan(data)]
    mean = np.mean(data)
    if decimal == "percent":
        if boldface:
            string = f'$\mathbf{{{int(np.round(mean*100)):d}}}$'
        else:
            string = f'${int(np.round(mean*100)):d}$'
    elif decimal == 2:
        if boldface:
            string = f'$\mathbf{{{np.round(mean, decimals=2):.2f}}}$'
        else:
            string = f'${np.round(mean, decimals=2):.2f}$'
    elif decimal == 1:
        if boldface:
            string = f'$\mathbf{{{np.round(mean, decimals=1):.1f}}}$'
        else:
            string = f'${np.round(mean, decimals=1):.1f}$'
    elif decimal == 0:
        if boldface:
            string = f'$\mathbf{{{np.round(mean, decimals=0):.0f}}}$'
        else:
            string = f'${np.round(mean, decimals=0):.0f}$'
    return string

def print_mean_std(data, boldface=False, decimal=2):
    data = data[~np.isnan(data)]
    mean = np.mean(data)
    std = np.std(data)
    if decimal == "percent":
        if boldface:
            string = f'$\mathbf{{{int(np.round(mean*100)):d} \pm {int(np.round(std*100)):d}}}$'
        else:
            string = f'${int(np.round(mean*100)):d} \pm {int(np.round(std*100)):d}$'
    elif decimal == 2:
        if boldface:
            string = f'$\mathbf{{{np.round(mean, decimals=2):.2f} \pm {np.round(std, decimals=2):.2f}}}$'
        else:
            string = f'${np.round(mean, decimals=2):.2f} \pm {np.round(std, decimals=2):.2f}$'
    elif decimal == 1:
        if boldface:
            string = f'$\mathbf{{{np.round(mean, decimals=1):.1f} \pm {np.round(std, decimals=1):.1f}}}$'
        else:
            string = f'${np.round(mean, decimals=1):.1f} \pm {np.round(std, decimals=1):.1f}$'
    elif decimal == 0:
        if boldface:
            string = f'$\mathbf{{{np.round(mean, decimals=0):.0f} \pm {np.round(std, decimals=0):.0f}}}$'
        else:
            string = f'${np.round(mean, decimals=0):.0f} \pm {np.round(std, decimals=0):.0f}$'
    return string

def find_best_model(methods, metric, decimal=2):
    best_model = None
    if metric == "WER_QuartzNet15x5Base-En":
        best_mean = np.inf
    else:
        best_mean = 0
    best_std = np.inf
    best_model = []
    for method in methods:
        mean = np.round(method["df"][metric].mean(), decimals=decimal)
        std = np.round(method["df"][metric].std(), decimals=decimal)
        if metric == "WER_QuartzNet15x5Base-En":
            if mean < best_mean:
                best_mean = mean
                best_std = std
                best_model = [method["name"]]
            elif mean == best_mean:
                if std < best_std:
                    best_std = std
                    best_model = [method["name"]]
                elif std == best_std:
                    best_model.append(method["name"])
        else:
            if mean > best_mean:
                best_mean = mean
                best_std = std
                best_model = [method["name"]]
            elif mean == best_mean:
                if std < best_std:
                    best_std = std
                    best_model = [method["name"]]
                elif std == best_std:
                    best_model.append(method["name"])
    return best_model  

def get_metrics(method, methods, metrics):
    values = []
    for metric in metrics:
        if metric in ("WER_QuartzNet15x5Base-En", "WER_ESPNET"):
            values.append(print_mean_std(method["df"][metric], boldface=method["name"] in find_best_model(methods, metric), decimal="percent"))#
        else:
            values.append(print_mean_std(method["df"][metric], boldface=method["name"] in find_best_model(methods, metric), decimal=2))
    return values 

In [6]:
noisy = {
    "name": "Noisy",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/noisy_metrics.csv"),
}

ovrl_300k = {
    "name": "OVRL 300k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/ovrl_300k_metrics.csv"),
}

rnd_300k = {
    "name": "RND 300k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/rnd_300k_metrics.csv"),
}

ovrl_250k = {
    "name": "OVRL 250k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/ovrl_250k_metrics.csv"),
}

rnd_250k = {
    "name": "RND 250k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/rnd_250k_metrics.csv"),
}

ovrl_200k = {
    "name": "OVRL 200k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/ovrl_200k_metrics.csv"),
}

rnd_200k = {
    "name": "RND 200k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/rnd_200k_metrics.csv"),
}

ovrl_150k = {
    "name": "OVRL 150k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/ovrl_150k_metrics.csv"),
}

rnd_150k = {
    "name": "RND 150k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/rnd_150k_metrics.csv"),
}

ovrl_100k = {
    "name": "OVRL 100k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/ovrl_100k_metrics.csv"),
}

rnd_100k = {
    "name": "RND 100k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/rnd_100k_metrics.csv"),
}

ovrl_50k = {
    "name": "OVRL 50k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/ovrl_50k_metrics.csv"),
}

rnd_50k = {
    "name": "RND 50k",
    "df": pd.read_csv("/data3/jrichter/logs/sgmse_logs/gerrit/rnd_50k_metrics.csv"),
}


methods = [noisy, ovrl_300k, rnd_300k, ovrl_250k, rnd_250k, ovrl_200k, rnd_200k, ovrl_150k, rnd_150k, ovrl_100k, rnd_100k, ovrl_50k, rnd_50k]


In [7]:
column_labels = ["", "\\textbf{POLQA}", "\\textbf{PESQ}", "\\textbf{SI-SDR} [dB]", "\\textbf{ESTOI}", "\\textbf{DNSMOS OVRL}", "\\textbf{DNSMOS SIG}", "\\textbf{DNSMOS BAK}", "\\textbf{DNSMOS}"]
metrics = ["POLQA_WB", "PESQ_WB", "SI-SDR", "ESTOI", "DNSMOS_OVRL", "DNSMOS_SIG", "DNSMOS_BAK", "DNSMOS_P808"]

df_all =  pd.DataFrame([[method["name"], 
                        *get_metrics(method, methods, metrics)
                      ] for method in methods], columns=column_labels)

latex = df_all.style.hide(axis="index").to_latex(column_format="@{}l|cccccccc@{}", hrules=True)
latex = latex.split("\n")
latex = '\n'.join(latex)
print(latex)

\begin{tabular}{@{}l|cccccccc@{}}
\toprule
 & \textbf{POLQA} & \textbf{PESQ} & \textbf{SI-SDR} [dB] & \textbf{ESTOI} & \textbf{DNSMOS OVRL} & \textbf{DNSMOS SIG} & \textbf{DNSMOS BAK} & \textbf{DNSMOS} \\
\midrule
Noisy & $3.11 \pm 0.79$ & $1.97 \pm 0.75$ & $8.44 \pm 5.61$ & $0.79 \pm 0.15$ & $2.69 \pm 0.53$ & $3.33 \pm 0.54$ & $3.12 \pm 0.75$ & $3.09 \pm 0.39$ \\
OVRL 300k & $3.43 \pm 0.73$ & $2.20 \pm 0.72$ & $9.33 \pm 5.71$ & $0.83 \pm 0.12$ & $2.93 \pm 0.34$ & $3.44 \pm 0.27$ & $3.56 \pm 0.50$ & $3.30 \pm 0.34$ \\
RND 300k & $3.41 \pm 0.80$ & $2.23 \pm 0.78$ & $9.28 \pm 5.96$ & $0.82 \pm 0.14$ & $2.82 \pm 0.49$ & $3.34 \pm 0.46$ & $3.41 \pm 0.72$ & $3.26 \pm 0.39$ \\
OVRL 250k & $3.44 \pm 0.72$ & $2.20 \pm 0.73$ & $\mathbf{9.41 \pm 5.86}$ & $0.83 \pm 0.13$ & $2.93 \pm 0.35$ & $3.43 \pm 0.28$ & $3.57 \pm 0.52$ & $3.30 \pm 0.34$ \\
RND 250k & $3.39 \pm 0.80$ & $2.21 \pm 0.79$ & $9.11 \pm 5.99$ & $0.82 \pm 0.14$ & $2.81 \pm 0.50$ & $3.34 \pm 0.47$ & $3.38 \pm 0.74$ & $3.24 \pm 0.40$ \