BLEU

机器翻译评价指标：ELEU(Bilingual Evaluation Understudy)

$$BLEU = BP * exp(\prod^{N}\limits_{i=1} w_i*\log(p_i))$$

* $p_i$：i_gram精度
* BP：Brevity Penalty，简短惩罚
    * $l_c$翻译结果长度，$l_r$参考答案长度
    * 1 如果$l_c > l_r$
    * $exp(1 - \frac{l_r}{l_c})$ 如果$l_c \le l_r$

In [1]:

import collections
import numpy as np


def split_sentence(sentence, n_gram):
    """Split sentence."""
    tokens = []
    for i in range(len(sentence)-n_gram+1):
        tokens.append(' '.join(sentence[i:i+n_gram]).lower())
    
    return tokens


def bleu(prediction: str, ground_truth: str, n_gram: int):
    """Calculate BiLingual Evaluation Understudy Score."""
    
    prediction = prediction.split(' ')
    ground_truth = ground_truth.split(' ')
    
    # 翻译结果长度
    lc = len(prediction)
    # 参考答案长度
    lr = len(ground_truth)
    
    # i-gram精度
    precisions = []
    for i in range(1, n_gram+1):
        ground_truth_tokens = split_sentence(ground_truth, i)
        predition_tokens = split_sentence(prediction, i)
        
        precision = sum([True for token in predition_tokens if token in ground_truth_tokens]) / len(predition_tokens)
        precisions.append(precision)
        
    print(precisions)
    result = np.exp((1/n_gram) * np.sum([np.log(p) for p in precisions]))
    result *= np.exp(1-lr/lc) if lc <= lr else 1
    
    return result

In [2]:
ground_truth = 'Israeli officials are responsible for airport security'
prediction = 'Airport security Israeli officials are responsible'

In [3]:
bleu(prediction, ground_truth, 4)

[1.0, 0.8, 0.5, 0.3333333333333333]


0.5115078115793242

In [4]:
prediction = '我 在 清 华 搞 自 然 语 言 处 理 。'
ground_truth = '我 是 清 华 大 学 自 然 语 言 处 理 实 验 室 的 同 学 。'

In [5]:
bleu(prediction, ground_truth, 4)

[0.8333333333333334, 0.5454545454545454, 0.4, 0.3333333333333333]


0.2768793496359699