In [67]:
import math
import string
import functools 

from IPython.display import display, Math, Latex, Markdown

In [53]:
ref_1 = 'The cat sat on the mat.'
cand_1 = 'The cat is on the mat.'
ref_2 = 'There is a cat on the mat.'
cand_2 = 'The the the the the the the the.'

punctuation_list = string.punctuation
preprocess = lambda x: x.lower().translate(str.maketrans('', '', string.punctuation))

def extract(sentence):
    sentence = preprocess(sentence)
    uni_gram = sentence.split()
    bi_gram = [' '.join(words) for words in zip(uni_gram[::], uni_gram[1::])]
    tri_gram = [' '.join(words) for words in zip(uni_gram[::], uni_gram[1::], uni_gram[2::])]
    quad_gram = [' '.join(words) for words in zip(uni_gram[::], uni_gram[1::], uni_gram[2::], uni_gram[3::])]
    return uni_gram, bi_gram, tri_gram, quad_gram

## N-gram Evaluation

### Example 

Reference

`{{ref_1}}` 

$\xrightarrow[\text{}]{\text{ Preprocessing }}$ `{{fn(ref_1)}}` 
     
$\xrightarrow[\text{}]{\text{Extract 1-gram}} $ `{{extract(ref_1)[0]}}`
     
$\xrightarrow[\text{}]{\text{Extract 2-gram}} $ `{{extract(ref_1)[1]}}` 

$\xrightarrow[\text{}]{\text{Extract 3-gram}} $ `{{extract(ref_1)[2]}}` 

Candidate

`{{cand_1}}`

$\xrightarrow[\text{}]{\text{ Preprocessing }}$ `{{fn(cand_1)}}` 
     
$\xrightarrow[\text{}]{\text{Extract 1-gram}} $ `{{extract(cand_1)[0]}}` 
     
$\xrightarrow[\text{}]{\text{Extract 2-gram}} $ `{{extract(cand_1)[1]}}` 

$\xrightarrow[\text{}]{\text{Extract 3-gram}} $ `{{extract(cand_1)[2]}}` 

## Considering Recall %

### Modified Precision - Clipping

### Example 

Candidate

`{{cand_2}}` 

$\xrightarrow[\text{}]{\text{ Preprocessing }}$ `{{fn(cand_2)}}` 
     
$\xrightarrow[\text{}]{\text{Extract 1-gram}} $ `{{extract(cand_2)[0]}}`
     
$\xrightarrow[\text{}]{\text{Extract 2-gram}} $ `{{extract(cand_2)[1]}}` 

$\xrightarrow[\text{}]{\text{Extract 3-gram}} $ `{{extract(cand_2)[2][:2] + ['...']}}` 

## [BLEU - Bilingual Evaluation Understudy](https://www.aclweb.org/anthology/P02-1040.pdf)

### Formula
$
\begin{align}
    \quad
        BLEU = BP \cdot exp(\sum_{n=1}^{N} w_n\log_{}{P_n}) \cr
\end{align}
$
    
$
\begin{align}
    \quad
        BP \quad\,\ = \begin{cases} 
                        1         &, \ c > r   \cr
                        exp(1-\frac{r}{c}) &, \ c \leq 0   \cr
                      \end{cases} 
\end{align}
$

In [70]:
def BLEU_n(candidate, reference):
    candidate = extract(candidate)
    reference = extract(reference)
    BLEU = 0
    W_n = 1. / len(candidate)
    for cand, ref in zip(candidate, reference):
        BLEU += W_n * math.log(P_n(cand, ref))
    BLEU = math.exp(BLEU) * BP(candidate[0], reference[0])
    return BLEU

def P_n(cand, ref):
    count = 0
    for c in cand:
        if c in ref:
            count += 1
            ref.remove(c)
    return 1 if count == 0 else count / len(cand)
    
def BP(candidate, reference):
    c, r = len(candidate), len(reference)
    return 1 if c > r else math.exp(1 - r / c)

0.5946035575013605