# Function definition for BLEU

In [64]:
import sys
import unittest
from math import exp, log, fsum
from functools import reduce
from operator import mul
import numpy as np


def word_count(str):
    counts = dict()
    words = sorted(str)
    for word in words:
        if word in counts:
            counts[word] += 1
        else:
            counts[word] = 1
    return counts


def mod_ngram_precision_count(cand_sent, ref_list, sum_count=True):
  cand_count = word_count(cand_sent)
  max_ref_count = dict.fromkeys(cand_count, 0)
  ref_counts = [word_count(ref) for ref in ref_list]

  for ref_count in ref_counts:
    for key, value in ref_count.items():
      if key in max_ref_count:
          if max_ref_count[key] < value: # update max count in refs
            max_ref_count[key] = value
  for key, value in max_ref_count.items():
    if cand_count[key] > value: # if candidate count more than max ref count
      cand_count[key] = value # clip with max ref count
  if sum_count==True:    
    return sum(cand_count.values())
  else:
    return cand_count


def ngram(text, gram_count):
  return [tuple(text[i:i+gram_count]) for i in range(len(text)-gram_count+1)]


# sentence bleu
def bleu_by_sent(cand_sent, ref_list):
  closest_ref_len = min([len(i) for i in ref_list], key=lambda x:abs(x-len(cand_sent)))
  bp = min(1, exp(1 - closest_ref_len / len(cand_sent)))
  prec = [mod_ngram_precision_count(ngram(cand_sent, i), [ngram(ref, i) for ref in ref_list]) / len(ngram(cand_sent, i)) for i in range(1,5)]
  ls_prec = [0.25 * log(prec_n) if prec_n != 0 else 0.25 * log(sys.float_info.min) for prec_n in prec]
  return bp * exp(fsum(ls_prec))


# corpus bleu
def bleu_by_corpus(ls_cand, ls_ref):
    
    len_can = []
    len_ref = []
    prec_crr = []
    prec_total = []

    for i in range(len(ls_cand)):
        cand_sent = ls_cand[i]
        ref_list = ls_ref[i]

        len_can.append(len(cand_sent))
        len_ref.append(min([len(i) for i in ref_list], key=lambda x:abs(x-len(cand_sent))))
        
        prec_crr.append([mod_ngram_precision_count(ngram(cand_sent, i), [ngram(ref, i) for ref in ref_list]) for i in range(1,5)])
        prec_total.append([len(ngram(cand_sent, i)) for i in range(1,5)])
        
    #precision cnt
    arr_crr = np.array(prec_crr)
    arr_total = np.array(prec_total)
    
    crr = [np.sum(arr_crr[:,i]) for i in range(len(arr_crr[0]))]
    total = [np.sum(arr_total[:,i]) for i in range(len(arr_total[0]))]
    p_n = [crr[i]/total[i] for i in range(len(crr))]

    bp = min(1, exp(1 -  fsum(len_ref) / fsum(len_can)))
    precision = [0.25 * log(prec_n) if prec_n != 0 else 0.25 * log(sys.float_info.min) for prec_n in p_n]
    
    return (bp * exp(fsum(precision)), p_n)

# Test of BLEU

#### sentence BLEU

In [66]:
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu

cand = "the cat the cat on the mat"
#cand = "the the the the the the the the"
ref_1 = "the cat is on the mat"
ref_2 = "there is a cat on the mat"

cand = cand.split()
ref_1 = ref_1.split()
ref_2 = ref_2.split()

print("nltk's :", bleu_by_sent(cand, [ref_1, ref_2]))
print('scratch:', sentence_bleu(references=[ref_1, ref_2], hypothesis=cand, weights=(0.25,0.25,0.25,0.25)))

nltk's : 0.4671379777282001
scratch: 0.4671379777282001


#### corpus BLEU

In [6]:
cand = ['Small business owners have wasted no time in applying for COVID-19 compensation on the first day of the money becoming available to them.',
       'Minister Lee said as the test run of the application system was conducted without overload errors, it was kept open.',
       'Out of the 62 trillion won worth supplementary budget that passed the National Assembly on Sunday, 23 trillion won will be doled out to small businesses hard hit by the protracted pandemic.']

ref = [["Small business owners didn't waste time applying for COVID-19 compensation on the first day the funds were available."],
      ['Minister Lee said the application system remained open because the commissioning went without overload errors.'],
      ['Of the 62 trillion won in the supplementary budget passed by the National Assembly on the 1st, 23 trillion won will be invested in small businesses that have been hit directly by the prolonged Corona 19.']]


print("nltk's : ", corpus_bleu(ref, cand, weights=(0.25,0.25,0.25,0.25)))
print('scratch: ', bleu_by_corpus(cand, ref))

nltk's :  0.7145484803707527
scratch:  (0.7145484803707527, [0.8597285067873304, 0.7425968109339408, 0.6697247706422018, 0.6096997690531177])


# BLEU of FR-EN and ZH-KO models

#### Preparing reference and output sentences

In [74]:
with open('../data/labdata/ko_zh_test_output.txt', 'r', encoding='utf-8') as f:
    zh_ko = f.read()
    
with open('../data/labdata/fr_en_test_output.txt', 'r', encoding='utf-8') as f:
    fr_en = f.read()
    
with open('../data/labdata/fr_test.txt', 'r', encoding='utf-8') as f:
    fr = f.read()
    
with open('../data/labdata/ko_test.txt', 'r', encoding='utf-8') as f:
    ko = f.read()
    
with open('../data/labdata/en_test.txt', 'r', encoding='utf-8') as f:
    en = f.read()

In [75]:
pred_zh_ko = zh_ko.split('\n')
pred_fr_en = fr_en.split('\n')
ref_en = en.split('\n')
ref_ko = ko.split('\n')
ref_fr = fr.split('\n')

print('length of Chinese sents')
print('predicted: ', len(pred_zh_ko))
print('reference: ', len(ref_ko))
print('\n')

print('length of English sents')
print('predicted: ', len(pred_fr_en))
print('reference: ', len(ref_en))

length of Chinese sents
predicted:  20000
reference:  20000


length of English sents
predicted:  20000
reference:  20000


In [59]:
corr_en = []
pred_en = []

for i in range(len(ref_ko)):
    corr = ref_en[i].replace('<UNK>', '').replace('   ', ' ')
    pred = pred_fr_en[i].replace('<UNK>', '').replace('   ', ' ')
    
    corr_en.append([corr])
    pred_en.append(pred)

In [60]:
corr_ko = []
pred_ko = []

for i in range(len(ref_ko)):
    corr = ref_ko[i].replace('<UNK>', '').replace('   ', ' ')
    pred = pred_zh_ko[i].replace('<UNK>', '').replace('   ', ' ')
    
    corr_ko.append([corr])
    pred_ko.append(pred)

#### Results of BLEU

In [67]:
print('French-English result')
print("nltk's : ", corpus_bleu(corr_en, pred_en, weights=(0.25,0.25,0.25,0.25)))
print('scratch: ', bleu_by_corpus(pred_en, corr_en))

French-English result
nltk's :  0.47989226890972836
scratch:  (0.4798990451982229, [0.7517812218147842, 0.5567918179838579, 0.4020321140592466, 0.31517697251653387])


In [68]:
print('Chinese-Korean result')
print("nltk's : ", corpus_bleu(corr_ko, pred_ko, weights=(0.25,0.25,0.25,0.25)))
print('scratch: ', bleu_by_corpus(pred_ko, corr_ko))

Chinese-Korean result
nltk's :  0.25218944149054745
scratch:  (0.25218944149054745, [0.7129282397266276, 0.4927147061446535, 0.3200227004449287, 0.19989457134930178])
