In [11]:
import numpy as np

def calculate_wer(reference, hypothesis):
    ref = reference.split()
    hyp = hypothesis.split()
    n = len(ref)
    m = len(hyp)

    # 建立 DP 表格
    dp = np.zeros((n + 1, m + 1), dtype=int)
    for i in range(n + 1):
        dp[i][0] = i
    for j in range(m + 1):
        dp[0][j] = j

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            cost = 0 if ref[i - 1] == hyp[j - 1] else 1
            dp[i][j] = min(
                dp[i - 1][j] + 1,      # 刪除
                dp[i][j - 1] + 1,      # 插入
                dp[i - 1][j - 1] + cost  # 替換或正確
            )

    # 回溯計算 substitutions, insertions, deletions
    i, j = n, m
    substitutions = insertions = deletions = 0
    while i > 0 or j > 0:
        if i > 0 and j > 0 and ref[i - 1] == hyp[j - 1]:
            i -= 1
            j -= 1
        elif i > 0 and j > 0 and dp[i][j] == dp[i - 1][j - 1] + 1:
            substitutions += 1
            i -= 1
            j -= 1
        elif j > 0 and dp[i][j] == dp[i][j - 1] + 1:
            insertions += 1
            j -= 1
        else:
            deletions += 1
            i -= 1

    wer = (substitutions + insertions + deletions) / n if n > 0 else float('inf')
    return wer

# 測試範例
reference_text = "CNN's calculators help you make informed decisions about your personal finances at every stage of life"
hypothesis_text = "CNN's calculator help you make informed decision about your personal finance at every stage of life"

wer_result = calculate_wer(reference_text, hypothesis_text)
print(f"字錯誤率 (WER): {wer_result:.2%}")


字錯誤率 (WER): 18.75%
