#Imports


In [None]:
!pip install sacrebleu
!pip install sentencepiece
!pip install transformers
!pip install sacremoses

Collecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/118.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m112.6/118.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.8.2 sacrebleu-2.3.1
Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m

In [None]:
import collections
import math
import nltk
nltk.download('punkt')
from sacrebleu.tokenizers import tokenizer_char, tokenizer_spm, tokenizer_none
import sacrebleu
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import MarianTokenizer, MarianMTModel

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# Define BLEU metric


In [None]:

def calculate_bleu(reference_list, candidate, weights=(0.25, 0.25, 0.25, 0.25)):
    # Calculate n-gram precision for n=1, 2, 3, and 4
    precisions = []
    if type(candidate) == str:
        return candidate in reference_list

    for n in range(1, len(weights)+1):
        #Count the number of candidate n-grams that appear in the references
        candidate_ngrams = [tuple(candidate[i:i + n]) for i in range(len(candidate) - n + 1)]
        candidate_ngram_counts = collections.Counter(candidate_ngrams)

        #Calculate the maximum n-gram count in the reference sentences
        reference_ngram_counts = collections.Counter()

        for reference in reference_list:
            reference_ngrams = [tuple(reference[i:i + n]) for i in range(len(reference) - n + 1)]
            ngram_counts = collections.Counter(reference_ngrams)

            for key, value in ngram_counts.items():
                reference_ngram_counts[key] = max(reference_ngram_counts.get(key, 0), value)

        for candidate_ngram in candidate_ngram_counts.keys():
            if candidate_ngram in reference_ngram_counts.keys():
                if candidate_ngram_counts[candidate_ngram] > reference_ngram_counts[candidate_ngram]:
                    candidate_ngram_counts[candidate_ngram] = reference_ngram_counts[candidate_ngram]
            else:
                candidate_ngram_counts[candidate_ngram] = 0

        denominator = 0.0000000001 if len(candidate_ngrams) ==0 else len(candidate_ngrams)
        precision = sum(candidate_ngram_counts.values()) / denominator
        precisions.append(precision)

    #Calculate brevity penalty
    reference_lengths = [len(reference) for reference in reference_list]
    candidate_length = len(candidate)
    closest_reference_length = min(reference_lengths, key=lambda x: abs(x - candidate_length))
    brevity_penalty = min(1, math.exp(1 - closest_reference_length / candidate_length))

    geometric_mean = math.exp(sum(weights[index] * math.log(precisions[index]) for index in range(len(precisions)) if precisions[index]>0))
    bleu = brevity_penalty * geometric_mean

    return bleu

# retrieve translations


In [None]:
with open("lab3/translations_bilingual.txt", 'r', encoding="utf-8") as file:
    bilingual_translations = file.readlines()
with open("lab3/translations_multilingual.txt", 'r', encoding="utf-8") as file:
    multilingual_translations = file.readlines()
with open("lab3/newsdiscusstest2015-fren-src.fr.sgm", 'r', encoding="utf-8") as file:
    french_sentences = file.readlines()
with open("lab3/newsdiscusstest2015-fren-ref.en.sgm", 'r', encoding="utf-8") as file:
    english_sentences = file.readlines()

##tokenize translations and gold references

In [None]:
bilingual_translations = [nltk.word_tokenize(sentence.strip().lower()) for sentence in bilingual_translations]
multilingual_translations = [nltk.word_tokenize(sentence.strip().lower()) for sentence in multilingual_translations]
french_sentences = [nltk.word_tokenize(sentence.strip().lower()) for sentence in french_sentences]
english_sentences = [nltk.word_tokenize(sentence.strip().lower()) for sentence in english_sentences]

# Calculate BLEU scores

In [None]:
bilingual_bleu = [calculate_bleu([gold], pred) for gold, pred in zip(english_sentences, bilingual_translations)]
multilingual_bleu = [calculate_bleu([gold], pred) for gold, pred in zip(english_sentences, multilingual_translations)]
bilingual_bleu = sum(bilingual_bleu) / len(bilingual_bleu)
multilingual_bleu = sum(multilingual_bleu) / len(multilingual_bleu)
print(f"bilingual_bleu: {str(bilingual_bleu)}, multilingual_bleu: {str(multilingual_bleu)}")

bilingual_bleu: 0.48434392672277704, multilingual_bleu: 0.46088272183263895


##Find all permutations for all sentences in the corpus

In [None]:
def get_all_permutations(candidate, reference):

    #Calculate the maximum n-gram count in the reference sentences
    reference_ngram_counts = collections.Counter()
    reference_ngrams = [tuple(reference[i:i + 2]) for i in range(len(reference) - 2 + 1)]
    ngram_counts = collections.Counter(reference_ngrams)
    for key, value in ngram_counts.items():
        reference_ngram_counts[key] = max(reference_ngram_counts.get(key, 0), value)

    #Count the number of candidate n-grams that appear in the references
    candidate_ngrams = [tuple(candidate[i:i + 2]) for i in range(len(candidate) - 2 + 1)]
    candidate_ngram_counts = collections.Counter(candidate_ngrams)
    for candidate_ngram in candidate_ngram_counts.keys():
        if candidate_ngram in reference_ngram_counts.keys():
            if candidate_ngram_counts[candidate_ngram] > reference_ngram_counts[candidate_ngram]:
                candidate_ngram_counts[candidate_ngram] = reference_ngram_counts[candidate_ngram]
        else:
            candidate_ngram_counts[candidate_ngram] = 0
    unmatching_bigrams = sum(elt+1 for elt in candidate_ngram_counts.values() if elt==0)
    return math.factorial(unmatching_bigrams + 1)

#basic exemple
reference = ["le", "chat", "mange", "le", "mulot","rouge"]
candidate = ["le", "chat", "voit", "le", "mulot", "marron"]

print(f"There are {get_all_permutations(reference, candidate)} permutations possibles of \"{' '.join(candidate)}\"")

#all corpus
permutations_count = 0
for pred, gold in zip(bilingual_translations, english_sentences):
    permutations_count += get_all_permutations(pred, gold)
print("All corpus mean permutations per candidate", permutations_count/len(bilingual_translations))
print("All corpus total permutations ",permutations_count)

There are 24 permutations possibles of "le chat voit le mulot marron"
All corpus mean permutations per candidate 9.01334351785624e+136
All corpus total permutations  135200152767843610495301134112762141535165996608194809310176793339697717688179906744929245388468309137864148102967187311809322666711665144418


# Compute SacreBLEU on our corpora


In [None]:
with open("lab3/translations_bilingual.txt", "r", encoding="utf-8") as file:
  bilingual_translations = file.readlines()
with open("lab3/translations_multilingual.txt", "r", encoding="utf-8") as file:
  multilingual_translations = file.readlines()
with open("lab3/newsdiscusstest2015-fren-ref.en.sgm", "r", encoding="utf-8") as file:
  english_sentences = file.readlines()

bilingual_sacrebleu_score = sacrebleu.corpus_bleu(bilingual_translations, [[elt] for elt in english_sentences], tokenize="13a").score
multilingual_sacrebleu_score = sacrebleu.corpus_bleu(multilingual_translations, [[elt] for elt in english_sentences], tokenize="13a").score
print(f"bilingual SacreBLEU:{bilingual_sacrebleu_score}, multilingual SacreBLEU: {multilingual_sacrebleu_score}")

i=0
# subword units
tokenizer = tokenizer_spm.Flores101Tokenizer()
bilingual_bleu_score = 0
for pred_bi, gold in zip(bilingual_translations, english_sentences):
    bilingual_bleu_score += calculate_bleu([tokenizer(gold).split()], tokenizer(pred_bi).split())
bilingual_sacrebleu_score = sacrebleu.corpus_bleu(bilingual_translations, [[elt] for elt in english_sentences], tokenize="flores101").score
print(f"SUBWORD UNITS -> BLEU: {bilingual_bleu_score/len(bilingual_translations)}, sacreBLEU {bilingual_sacrebleu_score}")


# none
tokenizer = sacrebleu.tokenizers.tokenizer_none.NoneTokenizer()
bilingual_bleu_score = 0
for pred_bi, gold in zip(bilingual_translations, english_sentences):
    bilingual_bleu_score += calculate_bleu([gold], pred_bi)

bilingual_sacrebleu_score = sacrebleu.raw_corpus_bleu(bilingual_translations, [[elt] for elt in english_sentences]).score
print(f"NONE -> BLEU: {bilingual_bleu_score/len(bilingual_translations)}, SacreBLEU: {bilingual_sacrebleu_score}")


# char
tokenizer = tokenizer_char.TokenizerChar()
bilingual_bleu_score = 0
bilingual_sacrebleu_score = 0
for pred_bi, gold in zip(bilingual_translations, english_sentences):
    bilingual_bleu_score += calculate_bleu([[*gold]], [*pred_bi])
bilingual_sacrebleu_score = sacrebleu.corpus_bleu(bilingual_translations, [[elt] for elt in english_sentences], tokenize="char").score

print(f"CHAR -> BLEU: {bilingual_bleu_score/len(bilingual_translations)}, SacreBLEU: {bilingual_sacrebleu_score}")



bilingual SacreBLEU:13.006392202018965, multilingual SacreBLEU: 13.380161378318961




SUBWORD UNITS -> BLEU: 0.472146790197805, sacreBLEU 12.70331870386537
NONE -> BLEU: 0.051333333333333335, SacreBLEU: 6.948413844794133
CHAR -> BLEU: 0.6689664270316409, SacreBLEU: 94.49905230826586
