In [15]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize

nltk.download('punkt', download_dir='data')
nltk.download('wordnet', download_dir='data')
nltk.data.path.append('data')

[nltk_data] Downloading package punkt to data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to data...
[nltk_data]   Package wordnet is already up-to-date!


In [16]:
# Tokenize and make lowercase
def preprocess_sentence(sentence):
    return word_tokenize(sentence.lower())

In [18]:
def calculate_similarity(reference, candidate):
    bleu_score = sentence_bleu([reference], candidate)
    meteor_score_value = meteor_score([reference], candidate)

    return bleu_score, meteor_score_value

In [17]:
import json
# This is just a test file for doing the similarity tests
test_data = json.load(open('data/test.json'))

english = preprocess_sentence(test_data['english'])
spanish_translation = preprocess_sentence(test_data['spanish'])
gpt_english = preprocess_sentence(test_data['gpt_english'])

print('Spanish:', spanish_translation)
print('English:', english)
print('GPT English:', gpt_english)

Spanish: ['registro', 'genealógico', 'de', 'jesucristo', ',', 'hijo', 'de', 'david', 'y', 'de', 'abraham']
English: ['this', 'is', 'the', 'genealogy', 'of', 'jesus', 'the', 'messiah', 'the', 'son', 'of', 'david', ',', 'the', 'son', 'of', 'abraham']
GPT English: ['genealogical', 'record', 'of', 'jesus', 'christ', ',', 'son', 'of', 'david', 'and', 'of', 'abraham', '.']


In [24]:
# Sanity check
bleu_score, meteor_score_value = calculate_similarity(english, english)
print("--- Sanity check ---")
print("BLEU score: {} METEOR score: {}".format(bleu_score, meteor_score_value))

# Check similarity between gpt english translation and actual english version
bleu_score, meteor_score_value = calculate_similarity(english, gpt_english)
print("--- GPT English ---")
print("BLEU score: {} METEOR score: {}".format(bleu_score, meteor_score_value))

# Checking with google translation
google_english = preprocess_sentence(test_data['google_english'])
bleu_score, meteor_score_value = calculate_similarity(english, google_english)
print("--- Google English ---")
print("BLEU score: {} METEOR score: {}".format(bleu_score, meteor_score_value))

--- Sanity check ---
BLEU score: 1.0 METEOR score: 0.999898229187869
--- GPT English ---
BLEU score: 3.3179146511781414e-78 METEOR score: 0.4990963855421686
--- Google English ---
BLEU score: 2.7014332045139275e-78 METEOR score: 0.35606744956338454
