In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from datasets import load_metric

In [3]:
wer_metric = load_metric("wer")

In [4]:
test_df = pd.read_csv("../results/raw/intron-open-test-all_models.csv")
dev_df = pd.read_csv("../results/raw/intron-open-dev-all_models.csv")

test_norm_df = pd.read_csv("../results/normalized/intron-open-test-all_models.csv")
dev_norm_df = pd.read_csv("../results/normalized//intron-open-dev-all_models.csv")

In [5]:
test_finetune_df = pd.read_csv("../results/raw/intron-open-test-all_models_finetuned.csv")
dev_finetune_df = pd.read_csv("../results/raw/intron-open-dev-all_models_finetuned.csv")

test_norm_finetune_df = pd.read_csv("../results/normalized/intron-open-test-all_models_finetuned.csv")
dev_norm_finetune_df = pd.read_csv("../results/normalized/intron-open-dev-all_models_finetuned.csv")

In [6]:
df = pd.concat([dev_df, test_df])
norm_df = pd.concat([dev_norm_df, test_norm_df])

print(df.shape)
df.tail(2)

(167133, 15)


Unnamed: 0,idx,domain,gender,duration,age_group,accent,user_ids,audio_paths,origin,country,reference,prediction,wer,name,split
103947,475636,general,Female,7.237,26-40,tswana,ed4d0a0d0446ad1ff0da31efac2b7578,/AfriSpeech-100/test/1144d8ac-acd3-4a16-ba7d-6...,african,ZA,International Day for Mangrove: Warri to host ...,International day for mangrove colon worried t...,0.777778,AWS [Medical] (Primary Care),test
103948,712100,general,Female,16.147,19-25,ibibio,5856f88ae52888c338fe3ac15d673849,/AfriSpeech-100/test/76521f8a-6968-4ba7-ac3f-e...,nigerian,NG,He said this on Wednesday night at the inaugur...,He said this on wednesday night at the inaugur...,0.555556,AWS [Medical] (Primary Care),test


In [7]:
finetune_df = pd.concat([dev_finetune_df, test_finetune_df])
finetune_norm_df = pd.concat([dev_norm_finetune_df, test_norm_finetune_df])

- compute per-accent statistics
- compute per-gender statistics
- compute per-domain statistics
- compute per age-group statistics

## Table 4

In [8]:
# Sanity check

x = norm_df[(norm_df.split == "test") & (norm_df.name == "openai/whisper-large") & (norm_df.domain == "clinical")]
print(wer_metric.compute(predictions=x.prediction, references=x.reference))

x = norm_df[(df.split == "test") & (norm_df.name == "openai/whisper-large") & (norm_df.domain == "general")]
print(wer_metric.compute(predictions=x.prediction, references=x.reference))

x = norm_df[(norm_df.split == "test") & (norm_df.name == "openai/whisper-large")]
print(wer_metric.compute(predictions=x.prediction, references=x.reference))

0.37495623131413797
0.24034761334386706
0.30579053505486736


In [9]:
def domain_wer(df):
    df.domain = df.domain.str.capitalize()
    df.split = df.split.str.capitalize()
    
    # compute the domain WER
    domain_df = df.groupby(["name", "split", "domain"]).apply(lambda x: wer_metric.compute(predictions=x.prediction, references=x.reference))
    _domain_df = domain_df.reset_index().set_index("name").pivot(columns=["split", "domain"])
    _domain_df = _domain_df.droplevel(0, axis=1)
    
    # compute WER across the samples
    domain_df_ = df.groupby(["name", "split"]).apply(lambda x: wer_metric.compute(predictions=x.prediction, references=x.reference)).reset_index()
    domain_df_["domain"] = "Both"
    domain_df_ = domain_df_.set_index("name").pivot(columns=["split", "domain"])
    domain_df_ = domain_df_.droplevel(0, axis=1)
    
    # merge
    domain_df__ = _domain_df.merge(domain_df_, on="name").sort_values(("Test", "Both"))
    
    domain_df__ = domain_df__[[("Dev", "General"), ("Dev", "Clinical"), ("Dev", "Both"), ("Test", "General"), ("Test", "Clinical"), ("Test", "Both")]]
    domain_df__ = domain_df__.round(3)
    domain_df__.columns.names = ["", ""]
    
    return domain_df__


In [10]:
domain_df = domain_wer(df)
domain_df

Unnamed: 0_level_0,Dev,Dev,Dev,Test,Test,Test
Unnamed: 0_level_1,General,Clinical,Both,General,Clinical,Both
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
openai/whisper-large,0.308,0.372,0.34,0.314,0.471,0.39
openai/whisper-medium,0.332,0.403,0.368,0.366,0.504,0.433
openai/whisper-medium-en,0.334,0.396,0.365,0.378,0.504,0.439
Azure,0.397,0.441,0.419,0.426,0.528,0.476
openai/whisper-small,0.393,0.463,0.428,0.415,0.552,0.481
openai/whisper-small-en,0.384,0.46,0.422,0.417,0.565,0.489
AWS,0.449,0.547,0.498,0.467,0.646,0.554
AWS [Medical] (Primary Care),0.507,0.537,0.522,0.554,0.642,0.597
GCP [Medical],0.591,0.537,0.564,0.605,0.612,0.609
GCP,0.563,0.616,0.59,0.594,0.68,0.636


In [11]:
print(domain_df.to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{3}{l}{Dev} & \multicolumn{3}{l}{Test} \\
{} & General & Clinical &   Both & General & Clinical &   Both \\
name                                        &         &          &        &         &          &        \\
\midrule
openai/whisper-large                        &   0.308 &    0.372 &  0.340 &   0.314 &    0.471 &  0.390 \\
openai/whisper-medium                       &   0.332 &    0.403 &  0.368 &   0.366 &    0.504 &  0.433 \\
openai/whisper-medium-en                    &   0.334 &    0.396 &  0.365 &   0.378 &    0.504 &  0.439 \\
Azure                                       &   0.397 &    0.441 &  0.419 &   0.426 &    0.528 &  0.476 \\
openai/whisper-small                        &   0.393 &    0.463 &  0.428 &   0.415 &    0.552 &  0.481 \\
openai/whisper-small-en                     &   0.384 &    0.460 &  0.422 &   0.417 &    0.565 &  0.489 \\
AWS                                         &   0.449 &    0.547 &  0.498 &   0.467

In [12]:
domain_norm_df = domain_wer(norm_df)
domain_norm_df

Unnamed: 0_level_0,Dev,Dev,Dev,Test,Test,Test
Unnamed: 0_level_1,General,Clinical,Both,General,Clinical,Both
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
openai/whisper-large,0.235,0.287,0.261,0.24,0.375,0.306
openai/whisper-medium,0.246,0.3,0.273,0.276,0.392,0.332
openai/whisper-medium-en,0.267,0.315,0.291,0.304,0.414,0.358
openai/whisper-small,0.313,0.372,0.343,0.33,0.455,0.391
Azure,0.309,0.359,0.334,0.34,0.444,0.391
openai/whisper-small-en,0.319,0.384,0.352,0.35,0.482,0.414
AWS,0.332,0.437,0.385,0.354,0.536,0.442
AWS [Medical] (Primary Care),0.385,0.416,0.4,0.439,0.52,0.478
GCP [Medical],0.55,0.475,0.512,0.567,0.537,0.552
facebook/wav2vec2-large-xlsr-53-english,0.498,0.561,0.53,0.506,0.65,0.576


In [13]:
print(domain_norm_df.to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{3}{l}{Dev} & \multicolumn{3}{l}{Test} \\
{} & General & Clinical &   Both & General & Clinical &   Both \\
name                                        &         &          &        &         &          &        \\
\midrule
openai/whisper-large                        &   0.235 &    0.287 &  0.261 &   0.240 &    0.375 &  0.306 \\
openai/whisper-medium                       &   0.246 &    0.300 &  0.273 &   0.276 &    0.392 &  0.332 \\
openai/whisper-medium-en                    &   0.267 &    0.315 &  0.291 &   0.304 &    0.414 &  0.358 \\
openai/whisper-small                        &   0.313 &    0.372 &  0.343 &   0.330 &    0.455 &  0.391 \\
Azure                                       &   0.309 &    0.359 &  0.334 &   0.340 &    0.444 &  0.391 \\
openai/whisper-small-en                     &   0.319 &    0.384 &  0.352 &   0.350 &    0.482 &  0.414 \\
AWS                                         &   0.332 &    0.437 &  0.385 &   0.354

In [14]:
domain_finetune_df = domain_wer(finetune_df)
domain_finetune_df

Unnamed: 0_level_0,Dev,Dev,Dev,Test,Test,Test
Unnamed: 0_level_1,General,Clinical,Both,General,Clinical,Both
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
openai/whisper-medium-all,0.326,0.344,0.335,0.309,0.377,0.342
facebook/wav2vec2-large-xlsr-53-english-all,0.386,0.401,0.394,0.374,0.428,0.4
openai/whisper-medium-clinical,0.507,0.293,0.399,0.482,0.315,0.401
facebook/wav2vec2-large-xlsr-53-english-general,0.354,0.506,0.431,0.338,0.548,0.44
openai/whisper-medium-general,0.321,0.551,0.437,0.306,0.599,0.449
facebook/wav2vec2-large-xlsr-53-english-clinical,0.497,0.406,0.451,0.487,0.429,0.459


In [15]:
print(domain_finetune_df.to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{3}{l}{Dev} & \multicolumn{3}{l}{Test} \\
{} & General & Clinical &   Both & General & Clinical &   Both \\
name                                             &         &          &        &         &          &        \\
\midrule
openai/whisper-medium-all                        &   0.326 &    0.344 &  0.335 &   0.309 &    0.377 &  0.342 \\
facebook/wav2vec2-large-xlsr-53-english-all      &   0.386 &    0.401 &  0.394 &   0.374 &    0.428 &  0.400 \\
openai/whisper-medium-clinical                   &   0.507 &    0.293 &  0.399 &   0.482 &    0.315 &  0.401 \\
facebook/wav2vec2-large-xlsr-53-english-general  &   0.354 &    0.506 &  0.431 &   0.338 &    0.548 &  0.440 \\
openai/whisper-medium-general                    &   0.321 &    0.551 &  0.437 &   0.306 &    0.599 &  0.449 \\
facebook/wav2vec2-large-xlsr-53-english-clinical &   0.497 &    0.406 &  0.451 &   0.487 &    0.429 &  0.459 \\
\bottomrule
\end{tabular}



In [16]:
domain_finetune_norm_df = domain_wer(finetune_norm_df)
domain_finetune_norm_df

Unnamed: 0_level_0,Dev,Dev,Dev,Test,Test,Test
Unnamed: 0_level_1,General,Clinical,Both,General,Clinical,Both
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
openai/whisper-medium-all,0.213,0.241,0.227,0.192,0.242,0.216
facebook/wav2vec2-large-xlsr-53-english-all,0.295,0.308,0.302,0.279,0.308,0.293
facebook/wav2vec2-large-xlsr-53-english-general,0.254,0.437,0.347,0.236,0.468,0.349
openai/whisper-medium-general,0.205,0.486,0.347,0.186,0.525,0.351
facebook/wav2vec2-large-xlsr-53-english-clinical,0.437,0.312,0.374,0.424,0.308,0.368
openai/whisper-medium-clinical,0.49,0.264,0.376,0.464,0.266,0.368


In [17]:
print(domain_finetune_norm_df.to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{3}{l}{Dev} & \multicolumn{3}{l}{Test} \\
{} & General & Clinical &   Both & General & Clinical &   Both \\
name                                             &         &          &        &         &          &        \\
\midrule
openai/whisper-medium-all                        &   0.213 &    0.241 &  0.227 &   0.192 &    0.242 &  0.216 \\
facebook/wav2vec2-large-xlsr-53-english-all      &   0.295 &    0.308 &  0.302 &   0.279 &    0.308 &  0.293 \\
facebook/wav2vec2-large-xlsr-53-english-general  &   0.254 &    0.437 &  0.347 &   0.236 &    0.468 &  0.349 \\
openai/whisper-medium-general                    &   0.205 &    0.486 &  0.347 &   0.186 &    0.525 &  0.351 \\
facebook/wav2vec2-large-xlsr-53-english-clinical &   0.437 &    0.312 &  0.374 &   0.424 &    0.308 &  0.368 \\
openai/whisper-medium-clinical                   &   0.490 &    0.264 &  0.376 &   0.464 &    0.266 &  0.368 \\
\bottomrule
\end{tabular}



## Table 5

In [18]:
devset = pd.read_csv("../data/intron-dev-public-3231-clean.csv")
devset.head(2)

Unnamed: 0,idx,user_ids,accent,age_group,country,transcript,nchars,audio_ids,audio_paths,duration,origin,domain,split,gender
0,155349,659e36c14baaf7fa7bb197f951251f4b,setswana,26-40,BW,We should be asking ourselves whether we want ...,110,86ad3ef21e469217f28a749c990c81fd,/AfriSpeech-100/dev/92d2b94e-3e31-40be-b479-50...,8.400998,african,general,dev,Male
1,60812,1fd5f717cede9a867bf37d03c7d2166b,siswati,26-40,ZA,Other sagittal planes parallel to this off cen...,88,5409734bcc4a0f053e73f405d0f135da,/AfriSpeech-100/dev/127bdba1-8bc8-44a4-9c37-8e...,5.236984,african,clinical,dev,Female


In [19]:
def accent_wer(df, devset_df, origin, topk, open_source_list, commercial_list, ours_list, ):
    
    top_accent_df = devset_df[devset_df["origin"] == origin].groupby(["accent", "country"])["idx"].apply(lambda x: len(x)).sort_values(ascending=False)
    top_accent_df = top_accent_df[:topk]

    df = df.set_index(["accent", "country"])
    df = df[df.index.isin(top_accent_df.index)].reset_index("country")
    
    country_samples = df[df["origin"] == origin].groupby(["accent", "country"])["idx"].apply(lambda x: len(x)).sort_values(ascending=False)
    country_samples = country_samples.to_frame().rename(columns = {"idx": "Samples"})
    
    display(country_samples)
    print(df.shape)

    accent_df = df.groupby(["name", "accent", "country"]).apply(lambda x: wer_metric.compute(predictions=x.prediction, references=x.reference))
    
    accent_df_ = accent_df.reset_index(["name"], name="WER").pivot(columns="name")
    accent_df_ = accent_df_.droplevel(0, axis=1)
        

    
    accent_combined = country_samples.merge(accent_df_[open_source_list], on=["accent", "country"])
    accent_combined = accent_combined.merge(accent_df_[commercial_list], on=["accent", "country"])
    accent_combined = accent_combined.merge(accent_df_[ours_list], on=["accent", "country"])

    accent_combined = accent_combined.reset_index("country")
    accent_combined.index = accent_combined.index.str.capitalize()
    
    return accent_combined.round(3)
    

In [38]:
dev_df["origin"].value_counts()

african     37708
nigerian    25476
Name: origin, dtype: int64

In [53]:
m = "facebook/wav2vec2-large-xlsr-53-english"
# m = "openai/whisper-medium"
# m = "Azure"
a = "luo"
o = "african"
n=20

top_ = devset[devset["origin"] == o].groupby(["accent", "country"])["idx"].apply(lambda x: len(x)).sort_values(ascending=False)[:n]

sanity_check_df = test_norm_df.set_index(["accent", "country"])
sanity_check_df = sanity_check_df[sanity_check_df.index.isin(top_.index)].reset_index()

sanity_check_df = sanity_check_df[(sanity_check_df["name"] == m) & (sanity_check_df["accent"] == a) ]
r = wer_metric.compute(predictions=sanity_check_df.prediction, references=sanity_check_df.reference)
round(r, 3)


0.411

In [21]:
sanity_check_df[sanity_check_df["origin"] == "nigerian"].groupby(["accent", "country"])["idx"].apply(lambda x: len(x)).sort_values(ascending=False)[:10]


accent  country
yoruba  NG         575
Name: idx, dtype: int64

In [22]:
sanity_check_df.prediction.shape

(575,)

In [23]:
open_source = ["facebook/wav2vec2-large-xlsr-53-english",
               "openai/whisper-medium"]
commercial = ["Azure", "GCP", "AWS"]
ours = ["facebook/wav2vec2-large-xlsr-53-english-all", 
        "openai/whisper-medium-all"]

In [43]:
combined_df = pd.concat([test_df, test_finetune_df])
accent_df = accent_wer(combined_df, devset, "nigerian", 10, open_source, commercial, ours)
accent_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Samples
accent,country,Unnamed: 2_level_1
yoruba,NG,14375
igbo,NG,7975
hausa,NG,4200
ijaw,NG,1925
english,NG,1650
idoma,NG,1325
ikwere,NG,1000
izon,NG,450
urhobo,NG,375
nupe,NG,275


(33550, 14)


Unnamed: 0_level_0,country,Samples,facebook/wav2vec2-large-xlsr-53-english,openai/whisper-medium,Azure,GCP,AWS,facebook/wav2vec2-large-xlsr-53-english-all,openai/whisper-medium-all
accent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Yoruba,NG,14375,0.644,0.43,0.461,0.634,0.529,0.393,0.336
Igbo,NG,7975,0.636,0.443,0.479,0.624,0.551,0.374,0.317
Hausa,NG,4200,0.691,0.446,0.536,0.682,0.615,0.423,0.362
Ijaw,NG,1925,0.681,0.436,0.443,0.74,0.54,0.44,0.391
English,NG,1650,0.619,0.378,0.413,0.606,0.533,0.432,0.352
Idoma,NG,1325,0.696,0.485,0.512,0.695,0.624,0.427,0.383
Ikwere,NG,1000,0.591,0.316,0.396,0.558,0.402,0.326,0.232
Izon,NG,450,0.662,0.497,0.538,0.71,0.631,0.438,0.414
Urhobo,NG,375,0.633,0.434,0.457,0.775,0.513,0.446,0.352
Nupe,NG,275,0.424,0.326,0.372,0.448,0.384,0.302,0.279


In [44]:
combined_df = pd.concat([test_df, test_finetune_df])
accent_df = accent_wer(combined_df, devset, "african", 20, open_source, commercial, ours)
accent_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Samples
accent,country,Unnamed: 2_level_1
swahili,KE,11525
zulu,ZA,3750
isizulu,ZA,2200
setswana,BW,1600
twi,GH,1350
afrikaans,ZA,1225
luganda,UG,800
setswana,ZA,800
akan (fante),GH,725
kikuyu,KE,600


(27500, 14)


Unnamed: 0_level_0,country,Samples,facebook/wav2vec2-large-xlsr-53-english,openai/whisper-medium,Azure,GCP,AWS,facebook/wav2vec2-large-xlsr-53-english-all,openai/whisper-medium-all
accent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Swahili,KE,11525,0.527,0.304,0.389,0.503,0.445,0.336,0.286
Zulu,ZA,3750,0.547,0.308,0.4,0.55,0.437,0.401,0.336
Isizulu,ZA,2200,0.516,0.297,0.335,0.463,0.436,0.332,0.276
Setswana,BW,1600,0.498,0.295,0.343,0.437,0.428,0.327,0.29
Twi,GH,1350,0.598,0.341,0.466,0.572,0.546,0.336,0.277
Afrikaans,ZA,1225,0.453,0.217,0.289,0.518,0.314,0.36,0.31
Luganda,UG,800,0.602,0.434,0.489,0.657,0.458,0.482,0.396
Setswana,ZA,800,0.594,0.277,0.366,0.681,0.483,0.473,0.416
Akan (fante),GH,725,0.786,0.572,0.55,0.826,0.725,0.381,0.322
Kikuyu,KE,600,0.504,0.297,0.387,0.437,0.448,0.303,0.216


In [45]:
combined_df = pd.concat([test_norm_df, test_norm_finetune_df])

nigerian_test_norm_accent_df = accent_wer(combined_df, devset, "nigerian", 10, open_source, commercial, ours)
african_test_norm_accent_df = accent_wer(combined_df, devset, "african", 20, open_source, commercial, ours)

combined_norm_accent_df = pd.concat([nigerian_test_norm_accent_df, african_test_norm_accent_df])
combined_norm_accent_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Samples
accent,country,Unnamed: 2_level_1
yoruba,NG,14375
igbo,NG,7975
hausa,NG,4200
ijaw,NG,1925
english,NG,1650
idoma,NG,1325
ikwere,NG,1000
izon,NG,450
urhobo,NG,375
nupe,NG,275


(33550, 14)


Unnamed: 0_level_0,Unnamed: 1_level_0,Samples
accent,country,Unnamed: 2_level_1
swahili,KE,11525
zulu,ZA,3750
isizulu,ZA,2200
setswana,BW,1600
twi,GH,1350
afrikaans,ZA,1225
luganda,UG,800
setswana,ZA,800
akan (fante),GH,725
kikuyu,KE,600


(27500, 14)


Unnamed: 0_level_0,country,Samples,facebook/wav2vec2-large-xlsr-53-english,openai/whisper-medium,Azure,GCP,AWS,facebook/wav2vec2-large-xlsr-53-english-all,openai/whisper-medium-all
accent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Yoruba,NG,14375,0.575,0.325,0.359,0.578,0.418,0.292,0.216
Igbo,NG,7975,0.566,0.341,0.389,0.568,0.44,0.274,0.199
Hausa,NG,4200,0.627,0.358,0.457,0.633,0.488,0.32,0.243
Ijaw,NG,1925,0.608,0.364,0.372,0.671,0.446,0.321,0.238
English,NG,1650,0.546,0.317,0.354,0.551,0.467,0.305,0.193
Idoma,NG,1325,0.607,0.384,0.424,0.639,0.543,0.294,0.243
Ikwere,NG,1000,0.519,0.249,0.301,0.504,0.323,0.238,0.135
Izon,NG,450,0.607,0.431,0.483,0.662,0.524,0.355,0.276
Urhobo,NG,375,0.551,0.378,0.423,0.678,0.423,0.345,0.21
Nupe,NG,275,0.335,0.243,0.283,0.341,0.283,0.185,0.127


In [27]:
columns_ = ["accent","family", "country", "Samples", 
            "facebook/wav2vec2-large-xlsr-53-english",
            "nvidia/stt-en-conformer-transducer-large",
            "openai/whisper-medium"]+commercial+["facebook/wav2vec2-large-xlsr-53-english-all",
            "nvidia/stt-en-conformer-transducer-large-all",
            "openai/whisper-medium-all"]

In [28]:
langauge_family_df = pd.read_csv("../data/afrispeech_language_family.csv")
combined_norm_accent_df_ = combined_norm_accent_df.merge(langauge_family_df, on="accent")
combined_norm_accent_df_["nvidia/stt-en-conformer-transducer-large"] = "-"
combined_norm_accent_df_["nvidia/stt-en-conformer-transducer-large-all"] = "-"
combined_norm_accent_df_ = combined_norm_accent_df_[columns_]
combined_norm_accent_df_ = combined_norm_accent_df_.set_index("accent").sort_values(["family", "Samples"], ascending=False)
combined_norm_accent_df_


Unnamed: 0_level_0,family,country,Samples,facebook/wav2vec2-large-xlsr-53-english,nvidia/stt-en-conformer-transducer-large,openai/whisper-medium,Azure,GCP,AWS,facebook/wav2vec2-large-xlsr-53-english-all,nvidia/stt-en-conformer-transducer-large-all,openai/whisper-medium-all
accent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Luo,Nilo-Saharan,UG,300,0.411,-,0.234,0.229,0.343,0.343,0.309,-,0.234
Yoruba,Niger-Congo,NG,14375,0.575,-,0.325,0.359,0.578,0.418,0.292,-,0.216
Swahili,Niger-Congo,KE,11525,0.455,-,0.196,0.313,0.446,0.311,0.248,-,0.184
Igbo,Niger-Congo,NG,7975,0.566,-,0.341,0.389,0.568,0.44,0.274,-,0.199
Zulu,Niger-Congo,ZA,3750,0.477,-,0.226,0.335,0.486,0.349,0.32,-,0.24
Isizulu,Niger-Congo,ZA,2200,0.457,-,0.182,0.254,0.406,0.292,0.265,-,0.206
Ijaw,Niger-Congo,NG,1925,0.608,-,0.364,0.372,0.671,0.446,0.321,-,0.238
Twi,Niger-Congo,GH,1350,0.504,-,0.184,0.382,0.51,0.361,0.236,-,0.177
Idoma,Niger-Congo,NG,1325,0.607,-,0.384,0.424,0.639,0.543,0.294,-,0.243
Ikwere,Niger-Congo,NG,1000,0.519,-,0.249,0.301,0.504,0.323,0.238,-,0.135


In [34]:
combined_norm_accent_df_ = combined_norm_accent_df_.drop(columns="family")
print(combined_norm_accent_df_.to_latex())

\begin{tabular}{llrrlrrrrrlr}
\toprule
{} & country &  Samples &  facebook/wav2vec2-large-xlsr-53-english & nvidia/stt-en-conformer-transducer-large &  openai/whisper-medium &  Azure &    GCP &    AWS &  facebook/wav2vec2-large-xlsr-53-english-all & nvidia/stt-en-conformer-transducer-large-all &  openai/whisper-medium-all \\
accent       &         &          &                                          &                                          &                        &        &        &        &                                              &                                              &                            \\
\midrule
Luo          &      UG &      300 &                                    0.411 &                                        - &                  0.234 &  0.229 &  0.343 &  0.343 &                                        0.309 &                                            - &                      0.234 \\
Yoruba       &      NG &    14375 &                                 