In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from tqdm.auto import tqdm

tqdm.pandas()

In [3]:
evaluation = dict()
apis = ['wit', 'azure', 'gcloud']
datasets = ['mozilla', 'voxforge']

for corpus in datasets:
    evaluation[corpus] = dict()
    for api in apis:
        df = pd.read_csv(f'transcribed_datasets/transcribe_{api}/{corpus}_metrics.tsv', sep='\t')
        df['wer'] *= 100
        evaluation[corpus][api] = df[['wer', 'bleu', 'meteor', 'w2vcbow' ,'w2vskip']].mean().round(decimals=3).to_dict()

In [6]:
print(pd.DataFrame(evaluation['voxforge']).T.to_latex())

\begin{tabular}{lrrrrr}
\toprule
{} &     wer &   bleu &  meteor &  w2vcbow &  w2vskip \\
\midrule
wit    &  11.437 &  0.856 &   0.873 &    0.919 &    0.920 \\
azure  &   7.250 &  0.900 &   0.906 &    0.946 &    0.947 \\
gcloud &  10.491 &  0.862 &   0.874 &    0.925 &    0.925 \\
\bottomrule
\end{tabular}



In [None]:
apis = dict()
for api in ['wit', 'azure', 'gcloud']:
    corpus = 'mozilla'
    df_orig = pd.read_csv(f'./{corpus}/sentences_duration.tsv', sep='\t')
    df_orig['gender'] = df_orig['gender'].apply(lambda x: (x == 'other' and 'not informed') or x)
    df_transcribe = pd.read_csv(f'./transcribed_datasets/transcribe_{api}/{corpus}_metrics.tsv', sep='\t')
    df_transcribe['wer'] *= 100
    df = df_transcribe.merge(df_orig, left_on='file', right_on='filepath').groupby('gender').mean()
    apis[api] = df[['wer', 'bleu', 'meteor', 'w2vcbow', 'w2vskip']].to_dict()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('grayscale')
metric = 'wer'

legend_loc = 'lower right'
save_fig = f'{metric}_by_gender.pdf'
show = True
title = 'A'
ylabel = 'word error rate % (lower is better)'

labels = ['Wit.ai', 'Azure Speech Services', 'Google Cloud Speech-to-Text']

wit = [round(apis['wit'][metric][g], 2) for g in ['female', 'male', 'not informed']]
azure = [round(apis['azure'][metric][g], 2) for g in ['female', 'male', 'not informed']]
gcloud = [round(apis['gcloud'][metric][g], 2) for g in ['female', 'male', 'not informed']]

values = [
    wit, azure, gcloud
]
bar_names = ['Female', 'Male', 'Not Informed']

n = len(values)
w = .3
ls = 30 * w
x = np.arange(0, len(labels))

_, ax = plt.subplots()
for i, (b, value) in enumerate(zip(bar_names, values)):
    position = x + (w*(1-n)/2) + i*w
    bar = ax.bar(position, value, width=w, label=f'{b}')
    ax.bar_label(bar, padding=1, size=ls)

legend = plt.legend(loc=legend_loc, frameon=1)
frame = legend.get_frame()
frame.set_facecolor('white')
frame.set_edgecolor('black')

if title:
    plt.title(title)

plt.xticks(x, labels, rotation=10)
plt.ylabel(ylabel)

plt.tight_layout()
if save_fig:
    plt.savefig(save_fig)
if show:
    plt.show()