# Leaderboard

In [1]:
import editdistance,argparse
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score

def get_asr_wer(gold_df,submission_df):
    assert len(gold_df)==len(submission_df), "tsv length mismatch"
    errs_t = 0
    lengths_t = 0

    for tgt_words,hyp_words in zip(gold_df.pred_text.to_list(), submission_df.pred_text.to_list()):
        tgt_words = tgt_words.split()
        if pd.isna(hyp_words):
            hyp_words = []
        else:
            hyp_words = hyp_words.split()
        errs = editdistance.eval(hyp_words, tgt_words)
        length = len(tgt_words)
        errs_t += errs
        lengths_t += length

    wer = errs_t * 100.0 / lengths_t
    return wer

def get_sentiment_f1(gold_df,submission_df):
    
    gt = gold_df.pred_sentiment.to_list()
    pred = submission_df.pred_sentiment.to_list()
    macro_f1 = f1_score(gt, pred, average="macro") * 100
    
    return macro_f1


wers = []
wers_voxpopuli =[]
wers_voxceleb = []
sentiment_macro_f1s = []
ner_f1s = []
slue_scores = []
submissions = ["e2e_w2v2-l-ll60k-lm","pipeline_w2v2-l-ll60k-lm-deberta-l"]
for submission_name in submissions:


    gold = pd.read_csv("submissions/gold.tsv",sep="\t")
    submission = pd.read_csv(f"submissions/{submission_name}.tsv",sep="\t")

    #sort submission based on utterance id
    new_order = []
    for utterance_id in gold.id:
        assert utterance_id in submission.id.to_list(),f"missing id = {utterance_id}"
        index = submission.index[submission.id == utterance_id]
        assert len(index)==1, f"too many entities for id = {utterance_id}"
        new_order.append(index[0])
    submission = submission.loc[new_order]

    #overall wer (micro)
    wer = get_asr_wer(gold,submission)
    wers.append(wer)
    #voxpopuli wer
    wer_voxpopuli = get_asr_wer(gold[gold.set_name=="slue-voxpopuli"],submission[gold.set_name=="slue-voxpopuli"])
    wers_voxpopuli.append(wer_voxpopuli)
    #voxceleb wer
    wer_voxceleb = get_asr_wer(gold[gold.set_name=="slue-voxceleb"],submission[gold.set_name=="slue-voxceleb"])
    wers_voxceleb.append(wer_voxceleb)

    # sentiment analysis f1 score
    sentiment_macro_f1 = get_sentiment_f1(gold[gold.set_name=="slue-voxceleb"],submission[gold.set_name=="slue-voxceleb"])
    sentiment_macro_f1s.append(sentiment_macro_f1)


    # ner micro f1 score
    from slue_toolkit.eval import eval_utils
    from slue_toolkit.eval.eval_w2v_ner import make_distinct
    eval_utils.get_stats = eval_utils.get_ner_stats #quickpatch
    
    gold[gold.set_name=="slue-voxpopuli"],submission[gold.set_name=="slue-voxpopuli"]
    gold_distinct_label_lst = []
    for label in gold[gold.set_name=="slue-voxpopuli"].pred_ner.to_list():
        label_lst = []
        if eval(label):
            for lab in eval(label):
                label_lst.append(tuple(lab))

            gold_distinct_label_lst.append(make_distinct(label_lst))
        else:
            gold_distinct_label_lst.append([])

    submission_distinct_label_lst = []
    for label in submission[gold.set_name=="slue-voxpopuli"].pred_ner.to_list():
        label_lst = []
        if eval(label):
            for lab in eval(label):
                label_lst.append(tuple(lab))

            submission_distinct_label_lst.append(make_distinct(label_lst))
        else:
            submission_distinct_label_lst.append([])

    ner_result = eval_utils.get_ner_scores(gold_distinct_label_lst,submission_distinct_label_lst)
    ner_f1 = ner_result['overall_micro']['fscore']*100
    ner_f1s.append(ner_f1)
    slue_scores.append(((100 - (wer_voxpopuli+wer_voxceleb)/2) +sentiment_macro_f1 + ner_f1)/3)



slice_=['score_neg','score_neu','score_pos']
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #000066; color: white;'
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal;'}
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
data = {'submission': submissions,\
        'SLUE-score':slue_scores,\
        'WER(slue-voxpopuli)': wers_voxpopuli,\
        'WER(slue-voxceleb)': wers_voxceleb,\
        'NER F1': ner_f1s,\
        'Sentiment F1':sentiment_macro_f1s}

df = pd.DataFrame(data=data)
df.style.\
    set_table_attributes('style="font-size: 14px"',).\
    set_table_styles([cell_hover,index_names,headers], overwrite=False).\
    format(precision=1)


Unnamed: 0,submission,SLUE-score,WER(slue-voxpopuli),WER(slue-voxceleb),NER F1,Sentiment F1
0,e2e_w2v2-l-ll60k-lm,68.17,9.26,10.88,64.76,49.81
1,pipeline_w2v2-l-ll60k-lm-deberta-l,75.84,9.26,10.88,71.83,65.77
