In [None]:
import json
from pathlib import Path

import pandas as pd
import pysrt
from IPython.display import clear_output

from asr.comparison import MultipleTextsAlignment

In [2]:
input_dir =  Path('/home/oleg/pisets_test_results')

transcriptions = {}

for audio_path in Path('/home/oleg/pisets_test_set/').glob('*.wav'):
    
    transcriptions[audio_path.stem] = {}

    if (srt_path := audio_path.with_suffix('.srt')).is_file():
        truth = ' '.join([sub.text for sub in pysrt.open(srt_path)])
    else:
        with open(audio_path.with_suffix('.txt')) as f:
            truth = f.read()
    transcriptions[audio_path.stem]['truth'] = truth

    with open(input_dir / f'{audio_path.stem}_only_whisper_pipeline.txt') as f:
        transcriptions[audio_path.stem]['only_whisper_pipeline'] = f.read()

    with open(input_dir / f'{audio_path.stem}_only_whisper_longform.txt') as f:
        transcriptions[audio_path.stem]['only_whisper_longform'] = f.read()

    with open(input_dir / f'{audio_path.stem}_only_whisper_longform_conditioned.txt') as f:
        transcriptions[audio_path.stem]['only_whisper_longform_conditioned'] = f.read()

    with open(input_dir / f'{audio_path.stem}_lm_whisperV3_stretch_3_to_4.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['w2v2_golos_lm'] = ' '.join([x['transcription_from_segmenter'] for x in outputs])
        transcriptions[audio_path.stem]['whisperV3'] = ' '.join([x['transcription'] for x in outputs])
        transcriptions[audio_path.stem]['whisperV3_stretch'] = ' '.join([x['transcription_stretched'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_nolm_whisperV3.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['w2v2_golos_nolm'] = ' '.join([x['transcription_from_segmenter'] for x in outputs])
        # transcriptions[audio_path.stem]['whisperV3_from_golos_nolm'] = ' '.join([x['transcription'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_lm_whisperV3.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['w2v2_golos_nolm'] = ' '.join([x['transcription_from_segmenter'] for x in outputs])
        # transcriptions[audio_path.stem]['whisperV3_from_golos_nolm'] = ' '.join([x['transcription'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_lm_whisperV3_new.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['whisperV3_ru'] = ' '.join([x['transcription'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_lm_whisperV3_1_20.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['whisperV3_1-20_ru'] = ' '.join([x['transcription'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_lm_whisperV3_1_30.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['whisperV3_1-30_ru'] = ' '.join([x['transcription'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_lm_whisperV3_long_segments.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['whisperV3_long_segments'] = ' '.join([x['transcription'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_lm_whisperV2.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['w2v2_golos_lm'] = ' '.join([x['transcription_from_segmenter'] for x in outputs])
        transcriptions[audio_path.stem]['whisperV2'] = ' '.join([x['transcription'] for x in outputs])

    with open(input_dir / f'{audio_path.stem}_lm_whisperV3_long_segments_new.json') as f:
        outputs = json.load(f)
        transcriptions[audio_path.stem]['whisperV3_long_segments_ru'] = ' '.join([x['transcription'] for x in outputs])

In [3]:
wers = []

for audio_name, t in transcriptions.items():
    truth = t['truth']
    for mode_name in set(t.keys()) - {'truth'}:
        pred = t[mode_name]

        alignment = MultipleTextsAlignment.from_strings(truth, pred)
        wers.append({'audio': audio_name, 'model': mode_name, 'wer': alignment.wer()['wer']}) # max_insertions=np.inf

        clear_output()

        df = pd.DataFrame(wers).pivot(index='audio', columns='model', values='wer')
        display(df)

model,only_whisper_longform,only_whisper_longform_conditioned,only_whisper_pipeline,w2v2_golos_lm,w2v2_golos_nolm,whisperV2,whisperV3,whisperV3_1-20_ru,whisperV3_1-30_ru,whisperV3_long_segments,whisperV3_long_segments_ru,whisperV3_ru,whisperV3_stretch
audio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
galore,0.166367,0.346029,0.155228,0.275961,0.275961,0.176069,0.160259,0.132231,0.128279,0.131153,0.131153,0.151994,0.173194
harvard,0.010929,0.057377,0.045537,0.149818,0.149818,0.070128,0.035974,0.015938,0.014572,0.012295,0.012295,0.034153,0.064208
lankov,0.103079,0.144933,0.147354,0.316845,0.316845,0.16119,0.133864,0.087513,0.087513,0.114147,0.114147,0.129367,0.167416
savvateev,0.174276,0.192931,0.19244,0.605302,0.605302,0.319097,0.279332,0.216986,0.227295,0.175749,0.175749,0.270496,0.432499
tuberculosis,0.169576,0.2101,0.199501,0.279302,0.279302,0.250312,0.153678,0.131234,0.157107,0.159601,0.159601,0.148379,0.177993
zaliznyak,0.158086,0.31327,0.131617,0.245105,0.245105,0.182379,0.168238,0.116751,0.107324,0.127991,0.126904,0.157723,0.207759
zhirinovsky,0.043371,0.077241,0.115655,0.25444,0.25444,0.13796,0.094589,0.060306,0.068154,0.065262,0.065262,0.085502,0.13672


In [4]:
df[['whisperV3_1-20_ru', 'whisperV3_1-30_ru', 'only_whisper_longform', 'only_whisper_pipeline']] \
    .rename(columns={'whisperV3_1-20_ru': 'pisets 1-20', 'whisperV3_1-30_ru': 'pisets 1-30'})

model,pisets 1-20,pisets 1-30,only_whisper_longform,only_whisper_pipeline
audio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
galore,0.132231,0.128279,0.166367,0.155228
harvard,0.015938,0.014572,0.010929,0.045537
lankov,0.087513,0.087513,0.103079,0.147354
savvateev,0.216986,0.227295,0.174276,0.19244
tuberculosis,0.131234,0.157107,0.169576,0.199501
zaliznyak,0.116751,0.107324,0.158086,0.131617
zhirinovsky,0.060306,0.068154,0.043371,0.115655
