In [1]:
# Levenshtein distance method

import numpy as np

# Example data with correct translations
test_data = [
    ("Who's the boss? ", " Patron kim?"),
    ("Who's this guy? ", " Bu adam kim?"),
    ("Why do you lie? ", " Neden yalan söylüyorsun?"),
    ("Why is it dark? ", " Neden karanlık?"),
]

# Example translated pairs (mix of exact matches, close matches, and incorrect translations)
translated_pairs = [
    ("Who's the boss?", "Patron kim?"),  # Exact match
    ("Who's this guy?", "Bu adam kim."),  # Close match (punctuation difference)
    ("Why do you lie?", "Neden yalan söylersin?"),  # Semantically close, but not exact
    ("Why is it dark?", "Karanlık neden var?"),  # Somewhat semantically related, but different structure
    ("This is not in test data", "Test verisinde bu yok"),  # Not in test data
]

def levenshtein_distance(s1, s2):

    if s1 == s2:
        return 1
    elif len(s1) == 0 or len(s2) == 0:
        return 0
    else:
        v0 = np.zeros(len(s2) + 1)
        v1 = np.zeros(len(s2) + 1)
        
        for i in range(len(v0)):
            v0[i] = i
            
        for i in range(len(s1)):
            v1[0] = i + 1
            
            for j in range(len(s2)):
                cost = 0 if s1[i] == s2[j] else 1
                v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost)
            
            v0, v1 = v1, v0
        
        return 1 - v0[len(s2)] / max(len(s1), len(s2))

def validate(translated_pairs, test_data):

    results = []
    
    test_data_dict = {eng.strip(): tur.strip() for eng, tur in test_data}

    for eng, tur in translated_pairs:
        eng = eng.strip() 
        if eng in test_data_dict:
            score = levenshtein_distance(tur, test_data_dict[eng])
            results.append((eng, tur, score))
        else:
            results.append((eng, tur, 0)

    return results
    
# Validate translations with the enhanced method
results = validate(translated_pairs, test_data)

# Print results
for eng, tur, score in results:
    print(f"EN: {eng} | TR: {tur} | Score: {score:.2f}")

StatementMeta(, , , SessionStarting, )