In [1]:
import os
import itertools
from collections import defaultdict
import re
import math

In [2]:
import pandas as pd
from functools import reduce
import operator

In [3]:
# Adding module to sys path
import sys
sys.path.append("/home/cfourrie/documents/software/public/CopperMT/")
# RNN imports
import pipeline
import torch, numpy as np
from fairseq import checkpoint_utils, data, options, tasks
from pipeline.neural_translation.multilingual_rnns.multilingual_rnn import MultilingualRNNModel

In [12]:
copper_dir = "/home/cfourrie/documents/software/public/CopperMT/"
raw_data_path = "inputs/raw_data/" 
split_data_path = "inputs/split_data/"

In [73]:
folders = ["abrahammonpa", "allenbai", "backstromnorthernpakistan", "castrosui", "davletshinaztecan", 
           "felekesemitic", "hantganbangime", "hattorijaponic", "listsamplesize", "mannburmish"]
splits = ["0.10", "0.20", "0.30", "0.40", "0.50"]
models = ["baseline", "BiNMT", "MNMT", "SMT"]
train_name = "training"
test_in_name = "test"
test_out_name = "solutions"
save_as = "results"

In [6]:
conversion_needed = False
preprocessing_needed = False


In [7]:
models_dir = "/home/cfourrie/documents/software/public/CopperMT/workspace"


## Convert data to usual format for our software

In [8]:
# If you do not want to share embeddings, removed shared_ from the folder path
if conversion_needed:
    for folder in folders:
        for split in splits:
            try: os.makedirs(os.path.join(split_data_path, f"shared_{folder}", split))
            except FileExistsError: pass
            # Training
            cur_data = pd.read_csv(os.path.join(raw_data_path, folder, f"training-{split}.tsv"), sep="\t")
            cur_languages = [c for c in cur_data.columns if c != 'COGID']
            if folder == "castrosui":
                cur_languages = [re.split('(?=[A-Z])', l)[1] for l in cur_languages]
                cur_data.columns = ['COGID'] + cur_languages
            for l1, l2 in itertools.combinations(cur_languages, 2):
                for name in [f"{l1}-{l2}", f"{l2}-{l1}"]:
                    with open(os.path.join(split_data_path, f"shared_{folder}", split, f"train.{name}.{l1}"), "w+") as f1, \
                         open(os.path.join(split_data_path, f"shared_{folder}", split, f"train.{name}.{l2}"), "w+") as f2:
                        for ix, row in cur_data[[l1, l2]].dropna().iterrows():
                            f1.write(row[l1] + "\n")
                            f2.write(row[l2] + "\n")
            for l in cur_languages:
                with open(os.path.join(split_data_path, f"shared_{folder}", split, f"train.{l}-{l}.{l}"), "w+") as f:
                    for ix, row in cur_data[[l]].dropna().iterrows():
                        f.write(row[l] + "\n")

In [9]:
# Creation of test and validation sets 
# If you do not want to share embeddings, removed shared_ from the folder path
if conversion_needed:
    for folder in folders:
        for split in splits:
            try: os.makedirs(os.path.join(split_data_path, f"shared_{folder}", split))
            except FileExistsError: pass
            # Training
            cur_data = pd.read_csv(os.path.join(raw_data_path, folder, f"test-{split}.tsv"), sep="\t")
            cur_languages = [c for c in cur_data.columns if c != 'COGID']
            if folder == "castrosui":
                cur_languages = [re.split('(?=[A-Z])', l)[1] for l in cur_languages]
                cur_data.columns = ['COGID'] + cur_languages

            for l1, l2 in itertools.combinations(cur_languages, 2):
                with open(os.path.join(split_data_path, f"shared_{folder}", split, f"test.{l1}-{l2}.{l1}"), "w+") as f1_test, \
                     open(os.path.join(split_data_path, f"shared_{folder}", split, f"test.{l2}-{l1}.{l2}"), "w+") as f2_test, \
                     open(os.path.join(split_data_path, f"shared_{folder}", split, f"valid.{l1}-{l2}.{l1}"), "w+") as f1_val, \
                     open(os.path.join(split_data_path, f"shared_{folder}", split, f"valid.{l1}-{l2}.{l2}"), "w+") as f2_val, \
                     open(os.path.join(split_data_path, f"shared_{folder}", split, f"valid.{l2}-{l1}.{l1}"), "w+") as f1_val_r, \
                     open(os.path.join(split_data_path, f"shared_{folder}", split, f"valid.{l2}-{l1}.{l2}"), "w+") as f2_val_r:
                        for ix, row in cur_data[[l1, l2]].dropna().iterrows():
                            if row[l1] != "?" and row[l2] != "?":                                
                                f1_val.write(row[l1] + "\n")                            
                                f1_val_r.write(row[l1] + "\n")                            
                                f2_val.write(row[l2] + "\n")                            
                                f2_val_r.write(row[l2] + "\n")
                            if row[l1] == "?" and row[l2] != "?":
                                f2_test.write(row[l2] + "\n")
                            if row[l2] == "?" and row[l1] != "?":
                                f1_test.write(row[l1] + "\n")

            for l in cur_languages:
                with open(os.path.join(split_data_path, f"shared_{folder}", split, f"valid.{l}-{l}.{l}"), "w+") as f:
                    for ix, row in cur_data[[l]].dropna().iterrows():
                        if row[l] != "?":
                            f.write(row[l] + "\n")

## Generating configuration files contents

In [10]:
if preprocessing_needed:
    folder = folders[3]
    cur_data = pd.read_csv(os.path.join(raw_data_path, folder, f"cognates.tsv"), sep="\t")
    cur_languages = [c for c in cur_data.columns if c != 'COGID']
    if folder == "castrosui":
        cur_languages = [re.split('(?=[A-Z])', l)[1] for l in cur_languages]

    print('PROJ_DIR="/home/cfourrie/documents/software/CopperMT"')
    print('MOSES_DIR="${PROJ_DIR}/submodules"')
    print()
    print('WK_DIR="${PROJ_DIR}/workspace/' + folder + '"')
    print('INPUTS_DIR="${PROJ_DIR}/inputs"')
    print()
    print(f'DATA_NAME="{folder}"')

    print(f'langs_bi="{",".join("-".join(l) for l in itertools.product(cur_languages, cur_languages))}"')
    print(f'langs="{",".join(cur_languages)}"')
    print(f'langs_shared="{"-".join(cur_languages)}"')


Step: 1h30 - In the pipeline folder, execute:

`
bash data_preprocess.sh parameters_abrahammonpa.cfg
bash data_preprocess.sh parameters_allenbai.cfg
bash data_preprocess.sh parameters_backstromnorthernpakistan.cfg
bash data_preprocess.sh parameters_castrosui.cfg
bash data_preprocess.sh parameters_davletshinaztecan.cfg
bash data_preprocess.sh parameters_felekesemitic.cfg
bash data_preprocess.sh parameters_hantganbangime.cfg
bash data_preprocess.sh parameters_hattorijaponic.cfg
bash data_preprocess.sh parameters_listsamplesize.cfg
bash data_preprocess.sh parameters_mannburmish.cfg
`

## Training models

In the pipeline folder, train bilingual neural models with:

`
bash main_nmt_bilingual_full.sh parameters_abrahammonpa.cfg
bash main_nmt_bilingual_full.sh parameters_allenbai.cfg
bash main_nmt_bilingual_full.sh parameters_backstromnorthernpakistan.cfg
bash main_nmt_bilingual_full.sh parameters_castrosui.cfg
bash main_nmt_bilingual_full.sh parameters_davletshinaztecan.cfg
bash main_nmt_bilingual_full.sh parameters_felekesemitic.cfg
bash main_nmt_bilingual_full.sh parameters_hantganbangime.cfg
bash main_nmt_bilingual_full.sh parameters_hattorijaponic.cfg
bash main_nmt_bilingual_full.sh parameters_listsamplesize.cfg
bash main_nmt_bilingual_full.sh parameters_mannburmish.cfg
`

In the pipeline folder, train multilingual neural models with:

`
bash main_nmt_multilingual_full.sh parameters_abrahammonpa.cfg
bash main_nmt_multilingual_full.sh parameters_allenbai.cfg
bash main_nmt_multilingual_full.sh parameters_backstromnorthernpakistan.cfg
bash main_nmt_multilingual_full.sh parameters_castrosui.cfg
bash main_nmt_multilingual_full.sh parameters_davletshinaztecan.cfg
bash main_nmt_multilingual_full.sh parameters_felekesemitic.cfg
bash main_nmt_multilingual_full.sh parameters_hantganbangime.cfg
bash main_nmt_multilingual_full.sh parameters_hattorijaponic.cfg
bash main_nmt_multilingual_full.sh parameters_listsamplesize.cfg
bash main_nmt_multilingual_full.sh parameters_mannburmish.cfg
`


In the pipeline folder, train bilingual statistical models with:

`
bash main_smt_full.sh parameters_abrahammonpa.cfg
bash main_smt_full.sh parameters_allenbai.cfg
bash main_smt_full.sh parameters_backstromnorthernpakistan.cfg
bash main_smt_full.sh parameters_castrosui.cfg
bash main_smt_full.sh parameters_davletshinaztecan.cfg
bash main_smt_full.sh parameters_felekesemitic.cfg
bash main_smt_full.sh parameters_hantganbangime.cfg
bash main_smt_full.sh parameters_hattorijaponic.cfg
bash main_smt_full.sh parameters_listsamplesize.cfg
bash main_smt_full.sh parameters_mannburmish.cfg
`

# Choosing best answer

## Utils

In [57]:
def sublist_with_unk(ls_with_unk, ls):
    ls = "".join(ls)
    ls_with_unk = "".join(ls_with_unk)
    
    for item in ls_with_unk.split("<unk>"):
        if item not in ls:
            return False

    return True

In [58]:
def get_neural_bleu_predictions(path, l_in, l_out, n_best):
    # Storage
    source = []
    target = []
    prediction = []
    confidence = []
    cur_prediction = []
    cur_confidence = []
    indices = []
    with open(
            f'{path}/bleu/bleu_checkpoint_best_{l_in}-{l_out}.{l_out}', 'r') as file:
        for i, line in enumerate(file):
            line = line.split("\t")
            # Actual source
            if "S-" in line[0]:
                word = line[1].strip(' ').split()
                source.append(word)
                indices.append(line[0])
                # We reinitialize the cur_prediction list
                if len(cur_prediction) > 0:
                    prediction.append(cur_prediction)
                    confidence.append(cur_confidence)
                    cur_prediction = []
                    cur_confidence = []
            # Actual target
            if "T-" in line[0]:
                word = line[1].strip(' ').split()
                target.append(word)
            # Hypothesis
            if "H-" in line[0] and len(cur_prediction) < n_best:
                word = line[2].strip(' ').split()
                cur_prediction.append(word)
                cur_confidence.append(math.exp(float(line[1])))
        prediction.append(cur_prediction)
        confidence.append(cur_confidence)
        try:
            prediction = [[bor[n] for bor in prediction] for n in range(n_best)]
        except IndexError as e:
            raise e

        #prediction = [[bor[n] for bor in prediction] for n in range(n_best)]

    return source, target, prediction, confidence, indices


def get_statistical_bleu_predictions(path_data, path, l_in, l_out, n_best, cur_n_best):
    target = []
    try:
        with open(f'{path_data}/test.{l_in}-{l_out}.{l_out}', 'r') as file:
            for i, line in enumerate(file):
                target.append(line.split())
    except FileNotFoundError:
        pass

    source = []
    with open(f'{path_data}/test.{l_in}-{l_out}.{l_in}', 'r') as file:
        for i, line in enumerate(file):
            source.append(line.split())

    prediction = []
    confidence = []
    cur_ix = -1
    cur_prediction = []
    cur_confidence = []
    indices = []
    with open(f'{path}/{l_in}-{l_out}/out/'
              f'test.{l_in}-{l_out}_nbest_{str(n_best)}.{l_out}', 'r') as file:
        for i, line in enumerate(file):
            line = line.split("|||")
            ix = int(line[0])
            word = line[1].strip(' ').split()

            if cur_ix != ix:
                if cur_ix != -1:
                    indices.append(cur_ix)
                    while len(cur_prediction) < cur_n_best:
                        cur_prediction.append(cur_prediction[-1])
                        cur_confidence.append(cur_confidence[-1])
                    prediction.append(cur_prediction)
                    confidence.append(cur_confidence)
                cur_prediction = [word]
                cur_confidence = [math.exp(float(line[-1]))]
                cur_ix = ix
            else:
                cur_prediction.append(word)
                cur_confidence.append(math.exp(float(line[-1])))
        # Management of last prediction
        indices.append(cur_ix)
        while len(cur_prediction) < cur_n_best:
            cur_prediction.append(cur_prediction[-1])
            cur_confidence.append(cur_confidence[-1])
        prediction.append(cur_prediction)
        confidence.append(cur_confidence)

    prediction = [[bor[n] for bor in prediction] for n in range(cur_n_best)]

    return source, target, prediction, confidence, indices

## Logic

In [59]:
def get_results_from_file(langs, model, cur_data):
    cur_results_grouped = {lang: defaultdict(dict) for lang in langs}
    cur_results_by_lang = {lang: defaultdict(dict) for lang in langs}
        # Read results
    for lang_out in langs:
        for lang_in in langs:
            if lang_in == lang_out: continue
            if model == "shared_bilingual":
                all_sources, _, all_predictions, all_confidences, all_indices = get_neural_bleu_predictions(
                    f"{models_dir}/{cur_data}/{model}/{lang_in}-{lang_out}/{split}", lang_in, lang_out, 10)
            elif model == "shared_multilingual":
                all_sources, _, all_predictions, all_confidences, all_indices = get_neural_bleu_predictions(
                    f"{models_dir}/{cur_data}/{model}/{split}", lang_in, lang_out, 10)
            else:
                all_sources, _, all_predictions, all_confidences, all_indices = get_statistical_bleu_predictions(
                    f"{split_data_path}/shared_{cur_data}/{split}",
                    f"{models_dir}/{cur_data}/{model}/{split}", lang_in, lang_out, 10, 10)
             
            for ix, (source, index) in enumerate(zip(all_sources, all_indices)):
                predictions = [" ".join(all_predictions[n_best][ix]) for n_best in range(10)]
                confidences = all_confidences[ix]
                cur_results_grouped[lang_out][index].update(
                    {f"{lang_in}_source": " ".join(source),
                     lang_in: sorted([(p, c) for p, c in zip(predictions, confidences)])}
                ) 
                cur_results_by_lang[lang_out][lang_in].update(
                     {" ".join(source): sorted([(p, c) for p, c in zip(predictions, confidences)])}
                ) 
                
                
    return cur_results_grouped, cur_results_by_lang

In [77]:
def get_best_prediction(row_results):
    predictions_scores = defaultdict(int)
    predictions_counts = defaultdict(int)
    for lang_res in row_results.values():
        for pred, score in lang_res:
            predictions_scores[pred] += score
            predictions_counts[pred] += 1
    # prediction scores is better for SMT models! (considerably)
    best_prediction = [k for k, v in predictions_scores.items() if v == max(predictions_scores.values())]
    if best_prediction:
        best_prediction = best_prediction[0]
    else:
        best_prediction = ""

    return best_prediction

In [78]:
def reordering(raw_data_path, cur_data, model, split, results_by_lang):
    test_df = pd.read_csv(os.path.join(raw_data_path, cur_data, f"test-{split}.tsv"), sep="\t")
    final_results = defaultdict(list)
    for ix, (_, row) in enumerate(test_df.iterrows()):
        row_dict = dict(row)
        final_results["COGID"].append(row_dict.pop("COGID"))
        lang_out = [k for k, v in row_dict.items() if v == "?"][0]
        row_results = {}
        # Compute all predictions
        for lang, val in row_dict.items():
            if lang == lang_out: continue
                
            final_results[lang].append("")                
            if not isinstance(val, str): continue # nan because was empty
            try:
                row_results[lang] = results_by_lang[model][lang_out][lang][val]
            except KeyError: # some chars are only present in test, and encoded as unk
                # We extract possible keys 
                keys_with_unk = [v for v in results_by_lang[model][lang_out][lang].keys() 
                                 if "<unk>" in v and len(v.split(" ")) == len(val.split(" "))]
                possible_keys = []
                for key in keys_with_unk:
                    if sublist_with_unk(key.split(" "), val.split(" ")):
                        possible_keys.append(key)

                if len(possible_keys) > 1:
                    raise Exception("Problem! Several plausible keys!", val, possible_keys)
                elif len(possible_keys) == 0:
                    raise Exception("Problem! No plausible key!", val, lang, lang_out, results_by_lang[model][lang_out])
                else:
                    row_results[lang] = results_by_lang[model][lang_out][lang][possible_keys[0]]
                    
            # We filter on length ratio
            row_results[lang] = [v for v in row_results[lang] if 0.3 < len(v)/len(val) < 3]


        # Rank best prediction, then Save
        final_results[lang_out].append(get_best_prediction(row_results))

    return final_results

In [80]:
loc_models = ['shared_bilingual', 'shared_multilingual', 'shared_statistical']

In [81]:
for cur_data in folders:
    for split in splits:
        df = pd.read_csv(os.path.join(raw_data_path, cur_data, f"cognates.tsv"), sep="\t")
        langs = [c for c in df.columns if c != 'COGID']

        results_grouped = {model: {lang: defaultdict(dict) for lang in langs} for model in models}
        results_by_lang = {model: {lang: defaultdict(dict) for lang in langs} for model in models}

        for model in loc_models:
            try:
                # Read results
                cur_results_grouped, cur_results_by_lang = get_results_from_file(langs, model, cur_data)
                results_grouped[model] = cur_results_grouped
                results_by_lang[model] = cur_results_by_lang

                # Reorder according to initial file
                final_results = reordering(raw_data_path, cur_data, model, split, results_by_lang)

                # Store best prediction
                if True:
                    with open(os.path.join(raw_data_path, cur_data, f"results-{model}-{split}.tsv"), "w+") as f:
                        f.write("COGID\t" + "\t".join(langs) + "\n")
                        for ix in range(len(final_results["COGID"])):
                            try:
                                f.write("\t".join([final_results[label][ix] if final_results[label][ix] else "" for label in ["COGID"] + langs]) + "\n")
                            except IndexError as e:
                                raise e
                print("OK", cur_data, split, model)
            except Exception as e:
                print("ERROR", cur_data, split, model, e)

OK abrahammonpa 0.10 shared_bilingual
OK abrahammonpa 0.10 shared_multilingual
OK abrahammonpa 0.10 shared_statistical
OK abrahammonpa 0.20 shared_bilingual
OK abrahammonpa 0.20 shared_multilingual
OK abrahammonpa 0.20 shared_statistical
OK abrahammonpa 0.30 shared_bilingual
OK abrahammonpa 0.30 shared_multilingual
OK abrahammonpa 0.30 shared_statistical
OK abrahammonpa 0.40 shared_bilingual
OK abrahammonpa 0.40 shared_multilingual
OK abrahammonpa 0.40 shared_statistical
OK abrahammonpa 0.50 shared_bilingual
OK abrahammonpa 0.50 shared_multilingual
OK abrahammonpa 0.50 shared_statistical
ERROR allenbai 0.10 shared_bilingual list index out of range
ERROR allenbai 0.10 shared_multilingual list index out of range
ERROR allenbai 0.10 shared_statistical list index out of range
OK allenbai 0.20 shared_bilingual
OK allenbai 0.20 shared_multilingual
OK allenbai 0.20 shared_statistical
OK allenbai 0.30 shared_bilingual
OK allenbai 0.30 shared_multilingual
OK allenbai 0.30 shared_statistical
OK 

ERROR hantganbangime 0.40 shared_statistical [Errno 2] No such file or directory: '/home/cfourrie/documents/software/public/CopperMT/workspace/hantganbangime/shared_statistical/0.40/Toro_Tegu-Bunoge/out/test.Toro_Tegu-Bunoge_nbest_10.Bunoge'
ERROR hantganbangime 0.50 shared_bilingual list index out of range
ERROR hantganbangime 0.50 shared_multilingual list index out of range
ERROR hantganbangime 0.50 shared_statistical [Errno 2] No such file or directory: '/home/cfourrie/documents/software/public/CopperMT/workspace/hantganbangime/shared_statistical/0.50/Bunoge-Bankan_Tey/out/test.Bunoge-Bankan_Tey_nbest_10.Bankan_Tey'
OK hattorijaponic 0.10 shared_bilingual
OK hattorijaponic 0.10 shared_multilingual
OK hattorijaponic 0.10 shared_statistical
OK hattorijaponic 0.20 shared_bilingual
OK hattorijaponic 0.20 shared_multilingual
OK hattorijaponic 0.20 shared_statistical
OK hattorijaponic 0.30 shared_bilingual
OK hattorijaponic 0.30 shared_multilingual
OK hattorijaponic 0.30 shared_statistica

```bash
source ~/Desktop/SIGTYP2022/venv/bin/activate
# Baseline
#model="backstromnorthernpakistan"
for model in "abrahammonpa" "allenbai" "backstromnorthernpakistan" "castrosui" "davletshinaztecan" "felekesemitic" "hantganbangime" "hattorijaponic" "listsamplesize" "mannburmish"; do
    echo ")0.10 - baseline" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/result-0.10.tsv --solution-file=${model}/solutions-0.10.tsv >> analysis_${model}.txt;
    echo ")0.10 - BiNMT" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/results-shared_bilingual-0.10.tsv --solution-file=${model}/solutions-0.10.tsv  >> analysis_${model}.txt;
    echo ")0.10 - MNMT" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/results-shared_multilingual-0.10.tsv --solution-file=${model}/solutions-0.10.tsv  >> analysis_${model}.txt;
    echo ")0.10 - SMT" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/results-shared_statistical-0.10.tsv --solution-file=${model}/solutions-0.10.tsv  >> analysis_${model}.txt;
    
    echo ")0.50 - baseline" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/result-0.50.tsv --solution-file=${model}/solutions-0.50.tsv  >> analysis_${model}.txt;
    echo ")0.50 - BiNMT" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/results-shared_bilingual-0.50.tsv --solution-file=${model}/solutions-0.50.tsv >> analysis_${model}.txt;
    echo ")0.50 - MNMT" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/results-shared_multilingual-0.50.tsv --solution-file=${model}/solutions-0.50.tsv  >> analysis_${model}.txt;
    echo ")0.50 - SMT" >> analysis_${model}.txt;
    st2022 --compare --prediction-file=${model}/results-shared_statistical-0.50.tsv --solution-file=${model}/solutions-0.50.tsv >> analysis_${model}.txt;
done
```

## Problems to manage
- sur allenbai encoding en 0.10 pour bilingue et multilingue
- castrosui languages changes for saves, need to change them when called here too
- davletshinaztecan 0.20 bilingual not launched for all language pairs?

In [75]:
results_dict = {language_family: {split: {
    model: {} for model in models
} for split in splits} for language_family in folders}

for language_family in folders:
    cur_model = "baseline"
    cur_split = "0.10"
    with open(os.path.join(copper_dir, raw_data_path, f"analysis_{language_family}.txt"), "r") as f: 
        for line in f:
            if "Language" in line: continue
            if "--" in line: continue
            if line[0] == ")":
                cur_split, cur_model = line[1:].replace("\n", "").split(" - ");
                continue
            lang, ed, norm_ed, f5 = " ".join(line.split()).split()
            results_dict[language_family][cur_split][cur_model].update(
                {lang: {"ED": ed, "Normalized ED": norm_ed, "B2 F5": f5}}
            )

KeyError: 'O.10'

In [72]:
models

['shared_bilingual', 'shared_multilingual', 'shared_statistical']

In [76]:
for k, v in results_dict.items():
    for split in ["0.10", "0.50"]:
        for model in models:
            try:
                print(k, split, v[split][model]["TOTAL"])
            except KeyError:
                print(k, model, split)

abrahammonpa 0.10 {'ED': '0.459', 'Normalized ED': '0.088', 'B2 F5': '0.884'}
abrahammonpa 0.10 {'ED': '1.150', 'Normalized ED': '0.223', 'B2 F5': '0.712'}
abrahammonpa 0.10 {'ED': '1.041', 'Normalized ED': '0.194', 'B2 F5': '0.725'}
abrahammonpa 0.10 {'ED': '0.372', 'Normalized ED': '0.069', 'B2 F5': '0.900'}
abrahammonpa 0.50 {'ED': '1.486', 'Normalized ED': '0.281', 'B2 F5': '0.687'}
abrahammonpa 0.50 {'ED': '3.483', 'Normalized ED': '0.657', 'B2 F5': '0.285'}
abrahammonpa 0.50 {'ED': '3.694', 'Normalized ED': '0.675', 'B2 F5': '0.249'}
abrahammonpa 0.50 {'ED': '0.961', 'Normalized ED': '0.168', 'B2 F5': '0.724'}
allenbai baseline 0.10
allenbai BiNMT 0.10
allenbai MNMT 0.10
allenbai SMT 0.10
allenbai baseline 0.50
allenbai BiNMT 0.50
allenbai MNMT 0.50
allenbai SMT 0.50
backstromnorthernpakistan baseline 0.10
backstromnorthernpakistan BiNMT 0.10
backstromnorthernpakistan MNMT 0.10
backstromnorthernpakistan SMT 0.10
backstromnorthernpakistan baseline 0.50
backstromnorthernpakistan Bi

In [42]:
for k, v in results_dict.items():
    for split in ["0.10", "0.50"]:
        for model in models:
            try:
                print(k, split, v[split][model]["TOTAL"])
            except KeyError:
                print(k, model, split)

abrahammonpa 0.10 {'ED': '0.459', 'Normalized ED': '0.088', 'B2 F5': '0.884'}
abrahammonpa 0.10 {'ED': '1.150', 'Normalized ED': '0.223', 'B2 F5': '0.712'}
abrahammonpa 0.10 {'ED': '1.041', 'Normalized ED': '0.194', 'B2 F5': '0.725'}
abrahammonpa 0.10 {'ED': '0.372', 'Normalized ED': '0.069', 'B2 F5': '0.900'}
abrahammonpa 0.50 {'ED': '1.486', 'Normalized ED': '0.281', 'B2 F5': '0.687'}
abrahammonpa 0.50 {'ED': '3.483', 'Normalized ED': '0.657', 'B2 F5': '0.285'}
abrahammonpa 0.50 {'ED': '3.694', 'Normalized ED': '0.675', 'B2 F5': '0.249'}
abrahammonpa 0.50 {'ED': '0.961', 'Normalized ED': '0.168', 'B2 F5': '0.724'}
allenbai 0.10 {'ED': '0.882', 'Normalized ED': '0.288', 'B2 F5': '0.743'}
allenbai BiNMT 0.10
allenbai MNMT 0.10
allenbai SMT 0.10
allenbai 0.50 {'ED': '1.192', 'Normalized ED': '0.379', 'B2 F5': '0.637'}
allenbai 0.50 {'ED': '1.273', 'Normalized ED': '0.406', 'B2 F5': '0.499'}
allenbai 0.50 {'ED': '1.206', 'Normalized ED': '0.386', 'B2 F5': '0.519'}
allenbai 0.50 {'ED': '0