Unfortunately Paraformer packages are not compatible with ESPnet. Create a new env and pip install
```
conda install -n funasr python=3.10  # installing jiwer fails on 3.11 (levenshtein dependency)
pip install jiwer==2.5  # 2.6 onwards conflicts with funasr's g2p click dependency
pip install funasr-onnx
```
before running this.

In [1]:
from pathlib import Path
import os
from funasr_onnx import Paraformer
import jiwer
import re
from ast import literal_eval


PWD = %pwd
PWD = Path(PWD)
outputs_dir = PWD / 'outputs'
os.makedirs(outputs_dir, exist_ok=True)
jets_dir = outputs_dir / 'tts_train_jets_raw_phn_tacotron_g2p_en_no_space/aishell3'
nopitch_dir = outputs_dir / 'tts_train_jets_raw_phn_tacotron_g2p_en_no_space/aishell3_nopitch'
model_dir = (PWD / "../../paraformer-large/").resolve()
data_dir = Path('../../datasets/data_aishell3/').resolve()

In [None]:
model = Paraformer(model_dir, device_id=-1, batch_size=1, quantize=True)

In [None]:
# Try running ASR on the first transcript
model([jets_dir / 'SSB06930002.wav'])

In [None]:
def run_asr(wav_paths, asr_result_path):
    with open(asr_result_path, 'w') as f:
        for wav_path in wav_paths:
            result = model([wav_path])[0]
            wordlist = result['preds'][1]
            f.write(f'{wav_path.parts[-1]} {wordlist}\n')

In [None]:
gt_dir = data_dir / 'test/wav'
gt_paths = sorted(gt_dir.glob('*/*.wav'))
gt_asr_path = model_dir / 'gt_result.txt'
run_asr(gt_paths, gt_asr_path)

In [None]:
jets_paths = sorted(jets_dir.glob('*.wav'))
jets_asr_path = model_dir / 'jets_result.txt'
run_asr(jets_paths, jets_asr_path)

In [None]:
nopitch_paths = sorted(nopitch_dir.glob('*.wav'))
nopitch_asr_path = model_dir / 'nopitch_result.txt'
run_asr(nopitch_paths, nopitch_asr_path)

In [2]:
transcript_file = data_dir / 'test/content.txt'

In [3]:
def get_transcripts():
    transcripts = {}
    with open(transcript_file) as f:
        for line in f:
            wav_file, transcript = line.strip().split(maxsplit=1)
            transcripts[wav_file] = re.sub(r'[ a-z0-9]', '', transcript)
    return transcripts

transcripts = get_transcripts()

In [None]:
def eval_wer(transcripts, asr_result_path, wer_path):
    with open(wer_path, 'w') as wer_file:
        wer_file.write('wav_file,gt_len,wer,eng_words\n')
        with open(asr_result_path) as f:
            for line in f:
                wav_file, asr_output = line.strip().split(maxsplit=1)
                asr_output = literal_eval(asr_output)
                eng_words = sum([word.isascii() for word in asr_output])
                transcript = transcripts[wav_file]
                gt_len = len(transcript)
                wer = jiwer.wer(truth=' '.join(transcript), hypothesis=' '.join(asr_output))
                wer_file.write(f'{wav_file},{gt_len},{wer},{eng_words}\n')

In [None]:
jets_wer_path = outputs_dir / 'jets_wer.csv'
eval_wer(transcripts=transcripts, asr_result_path=jets_asr_path, wer_path=jets_wer_path)

In [50]:
nopitch_wer_path = outputs_dir / 'nopitch_wer.csv'
eval_wer(transcripts=transcripts, asr_result_path=nopitch_asr_path, wer_path=nopitch_wer_path)

In [4]:
def eval_cer(transcripts, asr_result_path, cer_path):
    from prosody.en_to_zh import hans_to_pinyin
    with open(cer_path, 'w') as cer_file:
        cer_file.write('wav_file,gt_len,cer,eng_words\n')
        with open(asr_result_path) as f:
            for line in f:
                wav_file, asr_output = line.strip().split(maxsplit=1)
                asr_output = literal_eval(asr_output)
                eng_words = sum([word.isascii() for word in asr_output])
                transcript = transcripts[wav_file]
                trans_pinyin = ''.join(hans_to_pinyin(transcript))
                gt_len = len(trans_pinyin)
                asr_pinyin = ''.join(hans_to_pinyin(asr_output)).lower()
                cer = jiwer.cer(truth=trans_pinyin, hypothesis=asr_pinyin)
                cer_file.write(f'{wav_file},{gt_len},{cer},{eng_words}\n')

In [6]:
jets_asr_path = model_dir / 'jets_result.txt'
jets_cer_path = outputs_dir / 'jets_cer.csv'
eval_cer(transcripts=transcripts, asr_result_path=jets_asr_path, cer_path=jets_cer_path)