이 자료는 2021년 12월 14일에 마지막으로 테스트되었습니다.

이 자료는 위키독스 '딥 러닝을 이용한 자연어 처리 입문'에서 BLEU 구현하기 튜토리얼입니다.  
링크 : https://wikidocs.net/31695

In [1]:
import numpy as np
from collections import Counter
from nltk import ngrams

In [3]:
# count n-grams in tokens
def simple_count(tokens, n):
  return Counter(ngrams(tokens, n))

In [4]:
candidate = "It is a guide to action which ensures that the military always obeys the commands of the party."
tokens = candidate.split() # Tokenization
print('tokens \ :',tokens)

result = simple_count(tokens, 1) # n = 1 is a unigram

print('unigram count :',result)

tokens \ : ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 'that', 'the', 'military', 'always', 'obeys', 'the', 'commands', 'of', 'the', 'party.']
unigram count : Counter({('the',): 3, ('It',): 1, ('is',): 1, ('a',): 1, ('guide',): 1, ('to',): 1, ('action',): 1, ('which',): 1, ('ensures',): 1, ('that',): 1, ('military',): 1, ('always',): 1, ('obeys',): 1, ('commands',): 1, ('of',): 1, ('party.',): 1})


In [5]:
candidate = 'the the the the the the the'
tokens = candidate.split() # Tokenization
result = simple_count(tokens, 1) # n = 1 is a unigram
print('unigram count :',result)

unigram count : Counter({('the',): 7})


In [31]:
def count_clip(candidate, reference_list, n):
 # n-gram count in Ca sentence
  ca_cnt = simple_count(candidate, n)
  max_ref_cnt_dict = dict()

  for ref in reference_list: 
# n-gram count in Ref statement
    ref_cnt = simple_count(ref, n)

    
# Calculate the maximum number of occurrences of n-grams by comparing each Ref sentence.
    for n_gram in ref_cnt:
      if n_gram in max_ref_cnt_dict:
        max_ref_cnt_dict[n_gram] = max(ref_cnt[n_gram], max_ref_cnt_dict[n_gram])
      else:
        max_ref_cnt_dict[n_gram] = ref_cnt[n_gram]

  return {
        # count_clip = min(count, max_ref_count)
        n_gram: min(ca_cnt.get(n_gram, 0), max_ref_cnt_dict.get(n_gram, 0)) for n_gram in ca_cnt
     }

In [32]:
candidate = 'the the the the the the the'
references = [
    'the cat is on the mat',
    'there is a cat on the mat'
]
result = count_clip(candidate.split(),list(map(lambda ref: ref.split(), references)),1)
print('Calibrated unigram count :',result)

n-gram count in ca_cnt statement : Counter({('the',): 7})
n-gram count in Ref statement : Counter({('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1})
('the',)
('cat',)
('is',)
('on',)
('mat',)
n-gram count in Ref statement : Counter({('there',): 1, ('is',): 1, ('a',): 1, ('cat',): 1, ('on',): 1, ('the',): 1, ('mat',): 1})
('there',)
('is',)
res :  {('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1, ('there',): 1}
('a',)
('cat',)
res :  {('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1, ('there',): 1, ('a',): 1}
('on',)
res :  {('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1, ('there',): 1, ('a',): 1}
('the',)
res :  {('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1, ('there',): 1, ('a',): 1}
('mat',)
res :  {('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1, ('there',): 1, ('a',): 1}
n_gram  :  ('mat',)
ca_cnt.get(n_gram, 0)  :  0
max_ref_cnt_dict.get(n_gram, 0)  :  1
Calibrated unigram count : {('the',): 2}


In [8]:
[ ref.split() for ref in references]

[['the', 'cat', 'is', 'on', 'the', 'mat'],
 ['there', 'is', 'a', 'cat', 'on', 'the', 'mat']]

In [9]:
def modified_precision(candidate, reference_list, n):
  clip_cnt = count_clip(candidate, reference_list, n) 
  total_clip_cnt = sum(clip_cnt.values()) 

  cnt = simple_count(candidate, n)
  total_cnt = sum(cnt.values())# denominator

  # prevent the denominator from going to zero
  if total_cnt == 0: 
    total_cnt = 1

    # Numerator: sum of count_clip, denominator: sum of simple counts ==> corrected precision
  return (total_clip_cnt / total_cnt)

In [10]:
result = modified_precision(candidate.split(), list(map(lambda ref: ref.split(), references)), n=1)
print('Calibrated unigram precision :',result)

Calibrated unigram precision : 0.2857142857142857


In [15]:
# Function that returns the length of Ref closest to the length of Ca
def closest_ref_length(candidate, reference_list):
  ca_len = len(candidate) # ca length
  ref_lens = (len(ref) for ref in reference_list) # length of refs
# Find the ref that minimizes the length difference and return the length of the ref
  closest_ref_len = min(ref_lens, key=lambda ref_len: (abs(ref_len - ca_len), ref_len))
  return closest_ref_len

In [12]:
def brevity_penalty(candidate, reference_list):
  ca_len = len(candidate)
  ref_len = closest_ref_length(candidate, reference_list)

  if ca_len > ref_len:
    return 1

  # # If candidate is empty, BP = 0 → BLEU = 0.0
  elif ca_len == 0 :
    return 0
  else:
    return np.exp(1 - ref_len/ca_len)

In [16]:
def bleu_score(candidate, reference_list, weights=[0.25, 0.25, 0.25, 0.25]):
  bp = brevity_penalty(candidate, reference_list) # # brevity penalty, BP

  p_n = [modified_precision(candidate, reference_list, n=n) for n, _ in enumerate(weights,start=1)] 
  # p1, p2, p3, ..., pn
  score = np.sum([w_i * np.log(p_i) if p_i != 0 else 0 for w_i, p_i in zip(weights, p_n)])
  return bp * np.exp(score)

In [18]:
import nltk.translate.bleu_score as bleu

candidate = 'It is a guide to action which ensures that the military always obeys the commands of the party'
references = [
    'It is a guide to action that ensures that the military will forever heed Party commands',
    'It is the guiding principle which guarantees the military forces always being under the command of the Party',
    'It is the practical guide for the army always to heed the directions of the party'
]

print('BLEU in practice code :',bleu_score(candidate.split(),list(map(lambda ref: ref.split(), references))))
print('BLEU in package NLTK :',bleu.sentence_bleu(list(map(lambda ref: ref.split(), references)),candidate.split()))

BLEU in practice code : 0.5045666840058485
BLEU in package NLTK : 0.5045666840058485
