In [1]:
# pythonで推論を行う

In [2]:
import numpy as np

def n_gram_generator(sentence,n= 2,n_gram= False):
    '''
    N-Gram generator with parameters sentence
    n is for number of n_grams
    The n_gram parameter removes repeating n_grams 
    '''
    sentence = sentence.lower() # converting to lower case
    sent_arr = np.array(sentence.split()) # split to string arrays
    length = len(sent_arr)

    word_list = []
    for i in range(length+1):
        if i < n:
            continue
        word_range = list(range(i-n,i))
        s_list = sent_arr[word_range]
        string = ' '.join(s_list) # converting list to strings
        word_list.append(string) # append to word_list
        if n_gram:
            word_list = list(set(word_list))
    return word_list

In [3]:
from collections import Counter
import math

# https://stackoverflow.com/questions/56968434/bleu-score-in-python-from-scratch
def bleu_score(original,machine_translated):
    '''
    Bleu score function given a orginal and a machine translated sentences
    '''
    mt_length = len(machine_translated.split())
    o_length = len(original.split())

    # Brevity Penalty 
    if mt_length>o_length:
        BP=1
    else:
        penality=1-(mt_length/o_length)
        BP=np.exp(penality)

    # Clipped precision
    clipped_precision_score = []
    for i in range(1, 5):
        original_n_gram = Counter(n_gram_generator(original,i))
        machine_n_gram = Counter(n_gram_generator(machine_translated,i))

        c = sum(machine_n_gram.values())
        for j in machine_n_gram:
            if j in original_n_gram:
                if machine_n_gram[j] > original_n_gram[j]:
                    machine_n_gram[j] = original_n_gram[j]
            else:
                machine_n_gram[j] = 0

        #print (sum(machine_n_gram.values()), c)
        clipped_precision_score.append(sum(machine_n_gram.values())/c)

    #print (clipped_precision_score)

    weights =[0.25]*4

    s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, clipped_precision_score))
    s = BP * math.exp(math.fsum(s))
    return s

In [4]:
original = "It is a guide to action which ensures that the military alwasy obeys the command of the party"
machine_translated = "It is the guiding principle which guarantees the military forces alwasy being under the command of the party"

print (bleu_score(original, machine_translated))

0.27098211583470044


In [5]:
#https://github.com/bettkipkemoi/Bleu-Score/blob/main/Bleu.ipynb

from math import sqrt, log, exp
from collections import Counter

In [6]:
#Getting the n-grams from the given text
def get_ngrams(text, order):
    """
    Given a string `text` and an integer `order`, returns a Counter object containing
    the frequency counts of all ngrams of size `order` in the string.
    """
    ngrams = Counter()

    words = text.split()
    for i in range(len(words)- order+1):
      ngram = " ". join(words[i: i + order])
      ngrams[ngram] += 1

    return ngrams

In [7]:
def calculate_bleu(hypothesis, references):
    
    bleu=0
    p1=0
    p2=0
    p3=0
    p4=0
    bp=1
    

    # 1. Find the closest reference to the hypothesis
    closest_size=100000
    closest_ref=[]

    for ref in references:
      ref_size = len(ref)
      if abs(len(hypothesis) - ref_size) < closest_size:
        closest_size = abs(len(hypothesis) - ref_size)
        closest_ref = ref
        pass

    # 2. Calculating pn
    pns=[]
    for order in range(1,5):
      # calculate intersection and union of n-grams
      # hint: use the get_ngrams function you implemented
      # calculate pn for each order
        hyp_ngrams = get_ngrams(hypothesis, order)
        hyp_count = Counter(hyp_ngrams)
        closest_ref_ngrams = get_ngrams(closest_ref, order)
        closest_ref_count = Counter(closest_ref_ngrams)
        intersection_count = dict(hyp_count & closest_ref_count)
        intersection_size = sum(intersection_count.values())
        hyp_size = max(len(hyp_ngrams), 1)
        p_n = intersection_size / hyp_size
        pns.append(p_n)
        pass

    # 3. Calculating the brevity penalty
    bp=1
    c=len(hypothesis)
    r=min(abs(len(ref) - c) for ref in references)
    if c > r:
      bp = 1.0
    else:
      bp = exp(1 - r / c)

    # 4. Calculating the BLEU score
    weights = [0.25] * 4
    bleu=bp * exp(sum(w * log(p_n) for w, p_n in zip(weights, pns)))    
    
    # Assigning values to p1, p2, p3, p4!
    p1, p2, p3, p4 = pns

    
    # Do not change the variable name
    return bleu, p1, p2, p3, p4, bp


In [8]:
hypothesis="Abandon all hope , ye who enter here"
references=["All hope abandon , ye who enter here", "All hope abandon , ye who enter in !", "Leave every hope, ye that enter", "Leave all hope , ye that enter"]

In [9]:
bleu, p1, p2, p3, p4, bp=calculate_bleu(hypothesis, references)
print("BLEU: %.3f" % bleu) # sanity check: 0.5 < BLEU < 1

BLEU: 0.541


In [10]:
original = "It is a guide to action which ensures that the military alwasy obeys the command of the party"
machine_translated = "It is the guiding principle which guarantees the military forces alwasy being under the command of the party"

In [11]:
bleu, p1, p2, p3, p4, bp=calculate_bleu(original, [machine_translated])
print("BLEU: %.3f" % bleu) # sanity check: 0.5 < BLEU < 1

BLEU: 0.279


# 推論
Greedyサーチで推論する。

In [1]:
!python generate.py --test_file='../dataset/test.jsonl' --model_file='../training/model/checkpoint_best.pt' --outfile="outfile.txt" --device="cpu" --max_len=100  --search='greedy'

# 評価
BlueScoreでGreedyサーチで推論した翻訳結果と，実際の翻訳との比較を行い，評価する。

In [None]:
# https://thepythoncode.com/article/bleu-score-in-python

from nltk.translate.bleu_score import sentence_bleu, corpus_bleu