Disfluencyの方は，脱落誤りが実質的にはあまり問題のない誤りなので，区別してカウントする

In [3]:
import tqdm
import pandas as pd

def make_result_df(result_file):
    with open(result_file, "r") as f:
        lines = f.readlines()
    results = {'F': {}, 'D': {}}

    id = None
    for line in tqdm.tqdm(lines):
        if line.startswith("id"):
            id = line.split()[1][1:-1].upper()
            ref = None
            hyp = None
        elif line.startswith("REF:"):
            ref = line.split()[1:]
            continue
        elif line.startswith("HYP:"):
            hyp = line.split()[1:]
            continue
        if id is not None and ref is not None and hyp is not None:
            for key in ('F', 'D'):
                tp, fn, fp, dl = 0, 0, 0, 0
                for r, h in zip(ref, hyp):
                    r = r.upper()
                    h = h.upper()
                    if key in r:
                        if key in h:
                            tp += 1
                        else:
                            fn += 1
                            if '*' in h: # 単純脱落誤り
                                dl += 1
                    else:
                        if key in h:
                            fp += 1
                results[key][id] = {'TP': tp, 'FN': fn, 'FP': fp, 'DL': dl}
            id = None
            ref = None
            hyp = None

    result_dfs = {}
    for key, value in results.items():
        df = pd.DataFrame(value).T

        # dfの各列の合計値を，ALLというインデクスで一番最後の行に追加する
        df.loc['ALL'] = df.sum()
        # Precision, Recall, F1を計算する．ただしTP, FN, FPが0の場合は0とする．
        df['Precision'] = (df['TP'] / (df['TP'] + df['FP'])).fillna(0)
        df['Recall'] = (df['TP'] / (df['TP'] + df['FN'])).fillna(0)
        df['F1'] = (2 * df['Precision'] * df['Recall'] / (df['Precision'] + df['Recall'])).fillna(0)
        

        result_dfs[key] = df

    return result_dfs

In [4]:
import os

exp_name = "asr_train_asr_cbs_transducer_081616_hop128/cbs_20epoch_with_lm"
out_name = "002_cbs081616hop128_transducer_with_lm"

for eval_name in ["eval1", "eval2", "eval3"]:
    eval_dir = os.path.join("exp", exp_name, eval_name, "score_wer")
    result_file = os.path.join(eval_dir, "result.txt")
    result_dfs = make_result_df(result_file)

    for key, df in result_dfs.items():
        print(f"Eval: {eval_name}, Type: {key}")
        print(df.loc['ALL'].to_dict())
        out_csv_name = f"{out_name}_{eval_name}_{key}.csv"
        df.to_csv(out_csv_name)
        print(f"Saved to {out_csv_name}")

100%|██████████| 8501/8501 [00:00<00:00, 137719.21it/s]


Eval: eval1, Type: F
{'TP': 2929.0, 'FN': 297.0, 'FP': 166.0, 'DL': 231.0, 'Precision': 0.9463651050080776, 'Recall': 0.9079355238685679, 'F1': 0.9267520961873121}
Saved to 002_cbs081616hop128_transducer_with_lm_eval1_F.csv
Eval: eval1, Type: D
{'TP': 216.0, 'FN': 368.0, 'FP': 47.0, 'DL': 232.0, 'Precision': 0.8212927756653993, 'Recall': 0.3698630136986301, 'F1': 0.5100354191263282}
Saved to 002_cbs081616hop128_transducer_with_lm_eval1_D.csv


100%|██████████| 8579/8579 [00:00<00:00, 221101.45it/s]


Eval: eval2, Type: F
{'TP': 2284.0, 'FN': 198.0, 'FP': 128.0, 'DL': 152.0, 'Precision': 0.9469320066334992, 'Recall': 0.9202256244963739, 'F1': 0.93338782182264}
Saved to 002_cbs081616hop128_transducer_with_lm_eval2_F.csv
Eval: eval2, Type: D
{'TP': 263.0, 'FN': 290.0, 'FP': 85.0, 'DL': 145.0, 'Precision': 0.7557471264367817, 'Recall': 0.4755877034358047, 'F1': 0.5837957824639289}
Saved to 002_cbs081616hop128_transducer_with_lm_eval2_D.csv


100%|██████████| 8723/8723 [00:00<00:00, 333806.98it/s]


Eval: eval3, Type: F
{'TP': 1577.0, 'FN': 123.0, 'FP': 199.0, 'DL': 95.0, 'Precision': 0.8879504504504504, 'Recall': 0.9276470588235294, 'F1': 0.9073647871116226}
Saved to 002_cbs081616hop128_transducer_with_lm_eval3_F.csv
Eval: eval3, Type: D
{'TP': 93.0, 'FN': 174.0, 'FP': 31.0, 'DL': 84.0, 'Precision': 0.75, 'Recall': 0.34831460674157305, 'F1': 0.4757033248081842}
Saved to 002_cbs081616hop128_transducer_with_lm_eval3_D.csv
