In [86]:
# imports
import numpy as np

In [87]:
# Task 0. Unigram BLEU score
def uni_bleu(references, sentence):
    """
    Calculates the unigram BLEU score for a sentence.

    Args:
        references: list of reference translations
            - Each reference translataion is a list of the words in the
              translation
        sentence: list containing the model proposed sentence

    Returns:
        the unigram BLEU score

    My Notes:
        The final output is the brevity penalty multiplied by the number of
        words from our machine translated sentence show up in our references
        divided by the number of words in the machine translated sentence

        Example:
            machine_translation: "there is a cat here"
            ref1: "the cat is on the mat"
            ref2: "there is a cat on the mat"

        brevity penalty if candidate is shorter than any reference, else 1:
            e^(1-r/c)
            r: length of reference sentence that is closest to length of
                machine translated sentence
            c: length of machine translated sentence

        In our candidate: [there, is, a cat] are all in our references = 4
        Candidate is 5 long

        We finally end up with (4/5) * (e^(1-(6/5)))
    """
    # Calculate the first number, P = m/w_t
    # m = number of words from sentence in references
    # w_t = number of words in translated sentence
    w_t = len(sentence)
    m = 0
    corpus = []

    for reference in references:
        for word in sentence:
            if word in reference and word not in corpus:
                corpus.append(word)

    m = len(corpus)
    P = m / w_t

    ref_len = min(len(reference) for reference in references)

    BP = min(1, np.exp(1-(ref_len/w_t)))

    return P * BP

In [88]:
# 0-main
references = [["the", "cat", "is", "on", "the", "mat"], ["there", "is", "a", "cat", "on", "the", "mat"]]
sentence = ["there", "is", "a", "cat", "here"]

print(uni_bleu(references, sentence))

0.6549846024623855


In [114]:
# Task 1. N-gram BLEU score
def ngram_bleu(references, sentence, n):
    """
    Calculates the n-gram BLEU score for a sentence

    Args:
        references: list of reference translations
            - Each reference translataion is a list of the words in the
              translation
        sentence: list containing the model proposed sentence
        n: size of the n-gram to use for evaluation

    Returns:
        the unigram BLEU score

    Notes:
        Similar to unigram, except need to make a dictionaries of tuples to
        find matches.
    """
    # Calculate n-gram counts in the sentence (Create corpus of tuples)
    corpus = {}
    for i in range(len(sentence) - n + 1):
        ngram = tuple(sentence[i:i + n])
        corpus[ngram] = corpus.get(ngram, 0) + 1
    print("Corpus:", corpus)
    w_t = len(corpus)

    # Calculate maximum n-gram counts in the references
    max_counts = {}
    for reference in references:
        ref_counts = {}
        for i in range(len(reference) - n + 1):
            ngram = tuple(reference[i:i + n])
            ref_counts[ngram] = ref_counts.get(ngram, 0) + 1
        for ngram, count in ref_counts.items():
            max_counts[ngram] = max(max_counts.get(ngram, 0), count)
    print("Max Counts:", max_counts)
    print("Length Max Counts:", len(max_counts))

    # Calculate clipped n-gram counts
    clipped_counts = {}
    for ngram, count in corpus.items():
        clipped_counts[ngram] = min(count, max_counts.get(ngram, 0))
    print("Clipped counts: ", clipped_counts)
    print("Clipped Conts Length:", len(clipped_counts))

    m = sum(clipped_counts.values())

    print("m:", m)

    P = m / w_t
    print("P:",P)

    # Calculate brevity penalty
    ref_len = min(len(reference) for reference in references)
    c = len(sentence)
    BP = min(1, np.exp(1 - ref_len / c))

    from nltk.translate.bleu_score import sentence_bleu
    library_n_gram = (sentence_bleu(references, sentence, weights=(0, 1, 0, 0)))
    print("Library n-gram calculation: ", library_n_gram)

    return P * BP

In [115]:
# 1-main
references = [["the", "cat", "is", "on", "the", "mat"], ["there", "is", "a", "cat", "on", "the", "mat"]]
sentence = ["there", "is", "a", "cat", "here"]

print(ngram_bleu(references, sentence, 2))

Corpus: {('there', 'is'): 1, ('is', 'a'): 1, ('a', 'cat'): 1, ('cat', 'here'): 1}
Max Counts: {('the', 'cat'): 1, ('cat', 'is'): 1, ('is', 'on'): 1, ('on', 'the'): 1, ('the', 'mat'): 1, ('there', 'is'): 1, ('is', 'a'): 1, ('a', 'cat'): 1, ('cat', 'on'): 1}
Length Max Counts: 9
Clipped counts:  {('there', 'is'): 1, ('is', 'a'): 1, ('a', 'cat'): 1, ('cat', 'here'): 0}
Clipped Conts Length: 4
m: 3
P: 0.75
Library n-gram calculation:  0.6140480648084865
0.6140480648084865
