In [1]:
import os
import codecs
import sacrebleu

import numpy as np
import pandas as pd



In [2]:
proc_dir = './data/processed/'
languages = ['af', 'nr', 'zu', 'xh', 'nso', 'st', 'tn', 'ss', 've', 'ts']
translators = ['translator1', 'translator2', 'translator3', 'translator4']
len(languages)

10

In [3]:
def load_all_translations(lang):
    """Load all autshumato evaluation translations into a dictionary.
    
    Params
    ------
    
    lang (str):
        The ISO code language to load.
        
    Returns
    -------
    
    out (dict):
        A dictionary containing all translated lines from the Autshumato evaluation set
        for the given language. The key corresponds to a translator, and the value is a list
        containing the translation.
    """
    out = {}
    for translator in translators:
        fp = f"{translator}.{lang}.txt"
        fp = os.path.join(proc_dir, fp)
        with codecs.open(fp, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            # strip the translation of any escape chars or whitespace
            out[translator] = list(map(lambda x: x.strip(), lines))
    return out 
    

In [4]:
def calculate_bleu(translator, trans_dict):
    """Calculate bleu score using a human translator as the hypothesis, compared to
    other human translators for a reference.
    
    Params
    ------
    translator (str):
        The translator to use as a hypothesis
        
    trans_dict (dict):
        Dictionary containing all autshumato translations. The key is a translator.
        
    Returns
    -------
    score (float):
        The bleu score for the given translator.
    """
    ref_translators = list(set(translators).difference({translator})) # remove the translator we are checking from the reference list
    refs = list(trans_dict[i] for i in ref_translators) # get translations from all reference translators
    sys = trans_dict[translator] # hypothesis from translator we are checking
    
    return sacrebleu.corpus_bleu(sys, refs).score

In [6]:
# now calculate the bleu score for each alnguage and translator and place it into a dataframe

bleu = pd.DataFrame()

for lang in languages:
    print(lang + ':')
    lang_dict = load_all_translations(lang)
    for translator in translators:
        score = calculate_bleu(translator, lang_dict)
        bleu.loc[lang, translator] = score
        
        print(f'\t{translator}')
        print('\t' + '-'*len(translator))
        print(f'\tScore: {score}\n')
    print('----------------------')

af:
	translator1
	-----------
	Score: 76.76986445142002

	translator2
	-----------
	Score: 90.14196348687456

	translator3
	-----------
	Score: 81.49480654149953

	translator4
	-----------
	Score: 65.14309950878871

----------------------
nr:
	translator1
	-----------
	Score: 22.113736385159584

	translator2
	-----------
	Score: 19.165151808467147

	translator3
	-----------
	Score: 21.871423632239786

	translator4
	-----------
	Score: 22.35134527313277

----------------------
zu:
	translator1
	-----------
	Score: 22.96955748977736

	translator2
	-----------
	Score: 24.008443038541625

	translator3
	-----------
	Score: 22.24369679199224

	translator4
	-----------
	Score: 18.558022495221945

----------------------
xh:
	translator1
	-----------
	Score: 16.735573782049986

	translator2
	-----------
	Score: 18.122392502174005

	translator3
	-----------
	Score: 23.088689076859463

	translator4
	-----------
	Score: 22.146621328265475

----------------------
nso:
	translator1
	-----------
	Sco

In [11]:
bleu

Unnamed: 0,translator1,translator2,translator3,translator4
af,76.769864,90.141963,81.494807,65.1431
nr,22.113736,19.165152,21.871424,22.351345
zu,22.969557,24.008443,22.243697,18.558022
xh,16.735574,18.122393,23.088689,22.146621
nso,50.57938,51.759067,45.53595,48.261841
st,46.395787,48.254967,46.312425,43.973316
tn,40.419208,38.367032,32.816774,34.003414
ss,18.637086,19.29396,21.430363,20.405209
ve,55.002524,52.343902,54.549917,52.643063
ts,44.122824,41.469905,30.725252,44.982081


In [12]:
bleu.mean(axis=1)

af     78.387433
nr     21.375414
zu     21.944930
xh     20.023319
nso    49.034059
st     46.234124
tn     36.401607
ss     19.941655
ve     53.634851
ts     40.325015
dtype: float64

In [13]:
bleu.std(axis=1)

af     10.421990
nr      1.486478
zu      2.371241
xh      3.072876
nso     2.747531
st      1.753789
tn      3.587310
ss      1.231874
ve      1.336422
ts      6.572109
dtype: float64

In [14]:
bleu.to_csv('result.csv')