In [None]:
import json
from pathlib import Path

import pandas as pd

from asr.comparison import MultipleTextsAlignment

In [None]:
# def get_longest_insertion(al: MultipleTextsAlignment) -> str:
#     """Get character length if the insertion with max words
#     TODO fix, need to search in .is_replace ops also
#     """
#     insertions = [m for m in al.matches if m.is_insert]
#     if len(insertions) == 0:
#         return ''
#     max_insertion = max(insertions, key=lambda m: m.len2)
#     inserted_words = al.text2.get_words()[max_insertion.start2:max_insertion.end2]
#     return al.text2.text[inserted_words[0].start:inserted_words[-1].stop]

def display_results(results: pd.DataFrame):
    display(
        results.style.format({
            ('bond005_wer', 'whisper'): '{:.1f}',
            ('bond005_wer', 'podlodka'): '{:.1f}',
            ('bond005_wer', 'diff'): '{:+.1f}',
            ('my_wer', 'whisper'): '{:.1f}',
            ('my_wer', 'podlodka'): '{:.1f}',
            ('my_wer', 'diff'): '{:+.1f}',
        }).set_table_styles([
            {"selector": "td, th", "props": [("border", "1px solid grey !important")]},
        ])
    )

base_dir = Path('../long_audio_ru')

results = []

names = ['1_Зализняк_филология', '2_Гарвард_философия', '3_Саватеев_математика', '4_Жириновский_политика', '5_Ланьков_история']

for i in range(1, 6):
    for snr in ['none', '01db', '02db', '03db', '04db', '05db']:

        # reading reports
        dir = base_dir if snr == 'none' else base_dir / f'augmented/{snr}'
        with open(f'{dir}/report_for_vad_pipeline_{i}.json') as f:
            podlodka_preds_json = json.load(f)
        with open(f'{dir}/report_for_vad_pipeline_{i}_multi.json') as f:
            whisper_preds_json = json.load(f)

        # true transcription
        truth = whisper_preds_json['true']
        assert podlodka_preds_json['true'] == whisper_preds_json['true']

        # alignments
        al_whisper = MultipleTextsAlignment.from_strings(truth, whisper_preds_json['pred'])
        al_podlodka = MultipleTextsAlignment.from_strings(truth, podlodka_preds_json['pred'])
        
        # results
        results.append({
            'audio': names[i - 1],
            'snr': snr,
            ('bond005_wer', 'whisper'): 100 * float(whisper_preds_json['WER'][:-1]),
            ('bond005_wer', 'podlodka'): 100 * float(podlodka_preds_json['WER'][:-1]),
            ('my_wer', 'whisper'): 100 * al_whisper.wer()['wer'],
            ('my_wer', 'podlodka'): 100 * al_podlodka.wer()['wer'],
            # ('longest_insertion_len', 'whisper'): len(get_longest_insertion(al_whisper)),
            # ('longest_insertion_len', 'podlodka'): len(get_longest_insertion(al_podlodka)),
        })

results = pd.DataFrame(results).set_index(['audio', 'snr'])
results.columns = pd.MultiIndex.from_tuples(results.columns)
results.index = pd.MultiIndex.from_tuples(results.index, names=['audio', 'snr'])

results.insert(
    loc=results.columns.get_loc(('bond005_wer', 'podlodka')) + 1,
    column=('bond005_wer', 'diff'),
    value=results[('bond005_wer', 'podlodka')] - results[('bond005_wer', 'whisper')],
)
results.insert(
    loc=results.columns.get_loc(('my_wer', 'podlodka')) + 1,
    column=('my_wer', 'diff'),
    value=results[('my_wer', 'podlodka')] - results[('my_wer', 'whisper')],
)

display_results(results)

Unnamed: 0_level_0,Unnamed: 1_level_0,bond005_wer,bond005_wer,bond005_wer,my_wer,my_wer,my_wer
Unnamed: 0_level_1,Unnamed: 1_level_1,whisper,podlodka,diff,whisper,podlodka,diff
audio,snr,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1_Зализняк_филология,none,9.7,12.5,2.9,9.8,12.5,2.7
1_Зализняк_филология,01db,50.7,36.4,-14.2,41.8,34.3,-7.5
1_Зализняк_филология,02db,9.1,11.7,2.7,9.3,11.5,2.2
1_Зализняк_филология,03db,9.1,20.1,11.0,9.3,13.1,3.8
1_Зализняк_филология,04db,9.5,12.2,2.7,9.7,12.2,2.5
1_Зализняк_филология,05db,9.3,11.9,2.6,9.5,11.9,2.4
2_Гарвард_философия,none,2.0,2.7,0.7,2.0,2.7,0.7
2_Гарвард_философия,01db,2.4,3.1,0.7,2.4,3.1,0.7
2_Гарвард_философия,02db,3.4,4.4,1.0,3.4,4.4,1.0
2_Гарвард_философия,03db,2.2,3.7,1.5,2.2,3.7,1.5


In [5]:
display_results(results.groupby('audio').mean())

Unnamed: 0_level_0,bond005_wer,bond005_wer,bond005_wer,my_wer,my_wer,my_wer
Unnamed: 0_level_1,whisper,podlodka,diff,whisper,podlodka,diff
audio,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1_Зализняк_филология,16.2,17.5,1.3,14.9,15.9,1.0
2_Гарвард_философия,2.6,3.5,0.9,2.5,3.5,0.9
3_Саватеев_математика,32.9,33.8,1.0,32.1,33.5,1.4
4_Жириновский_политика,16.9,13.8,-3.1,14.7,13.8,-0.9
5_Ланьков_история,14.6,17.6,3.0,14.6,16.4,1.9
