In [7]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize

nltk.download('punkt', download_dir='data')
nltk.download('wordnet', download_dir='data')
nltk.data.path.append('data')

[nltk_data] Downloading package punkt to data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to data...


In [8]:
# Tokenize and make lowercase
def preprocess_sentence(sentence):
    return word_tokenize(sentence.lower())

In [9]:
import json
# This is just a test file for doing the similarity tests
test_data = json.load(open('data/test.json'))

english = preprocess_sentence(test_data['english'])
spanish_translation = preprocess_sentence(test_data['spanish'])
gpt_english = preprocess_sentence(test_data['gpt_english'])

print('Spanish:', spanish_translation)
print('English:', english)
print('GPT English:', gpt_english)

Spanish: ['registro', 'geneal√≥gico', 'de', 'jesucristo', ',', 'hijo', 'de', 'david', 'y', 'de', 'abraham']
English: ['this', 'is', 'the', 'genealogy', 'of', 'jesus', 'the', 'messiah', 'the', 'son', 'of', 'david', ',', 'the', 'son', 'of', 'abraham']
GPT English: ['genealogical', 'record', 'of', 'jesus', 'christ', ',', 'son', 'of', 'david', 'and', 'of', 'abraham', '.']


In [10]:
def calculate_similarity(reference, candidate):
    # Calculate BLEU score
    bleu_score = sentence_bleu([reference], candidate)

    # Calculate METEOR score
    meteor_score_value = meteor_score([reference], candidate)

    return bleu_score, meteor_score_value

In [11]:
# Check similarity between gpt english translation and actual english version
bleu_score, meteor_score_value = calculate_similarity(english, gpt_english)

print('BLEU:', bleu_score)
print('METEOR:', meteor_score_value)

BLEU: 3.3179146511781414e-78
METEOR: 0.4990963855421686
