In [1]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import gensim
import gensim.downloader as api
from gensim.models.fasttext import load_facebook_model
import random
from difflib import SequenceMatcher
from scipy import stats
import sacrebleu
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import os
import spacy
import re


# nltk.download()

German word2vec model Facebook https://fasttext.cc/docs/en/crawl-vectors.html (cc.de.300.bin)

In [2]:
# Code taken from https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python
def levenshtein(s1, s2):
    if len(s1) < len(s2):
        return levenshtein(s2, s1)

    # len(s1) >= len(s2)
    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
            deletions = current_row[j] + 1       # than s2
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    
    return previous_row[-1]

In [3]:
# https://docs.python.org/3/library/difflib.html
    
def changes_spread(original_tokenized, changed_tokenized, opcodes):
    start_change = -1
    end_change = -1
    for opcode in opcodes:
        if opcode[0] != 'equal':
            start_change = opcode[1]
            break
    for opcode in reversed(opcodes):
        if opcode[0] != 'equal':
            end_change = opcode[2]
            break
    return max(0, end_change-start_change)/len(changed_tokenized)

    

In [4]:
def highlight_in_capital(sentence_tokenized, highlight_positions):
    """
    Params:
        sentence_tokenized: tokenzied sentence
        highlight_positions: list of 2-sized tuples: [(p1, p2), (p3,p4), ...]
            where we want to highlight sentence[p1:p2], sentence[p3:p4]
    """
    highlighted_sentence = []
    
    last = 0  # index of the last position added to the new sentence
    for (start, stop) in highlight_positions:
        highlighted_sentence.extend(
            sentence_tokenized[last:start] + \
            [w.upper() for w in sentence_tokenized[start:stop]]
        )
        last = stop
    if last < len(sentence_tokenized):
        highlighted_sentence.extend(
            sentence_tokenized[last:]
        )
    return ' '.join(highlighted_sentence)

In [5]:
def two_chunk_changed(original_tokenized, changed_tokenized, opcodes, 
                      chunk_max_length=1, spacy_model=None, w2v_model=None):
    # Return the original and changed sentences with the chunk highlighted in capital
    # Return whether this sentence has only two chunk changes within the max length. 
    # And return the distance between the two changed chunks
    
    is_two_chunk_changed = False
    chunk_distance = pd.NA
    is_same_subtree = pd.NA
    changes_similarity = pd.NA
    
    
    
    changes_types = [o[0] for o in opcodes]
    
    # If not exactly two changes, return
    if not (all(changes_type == 'replace' or changes_type == 'equal' for changes_type in changes_types) and \
        changes_types.count('replace') == 2):
        return is_two_chunk_changed, chunk_distance, is_same_subtree, changes_similarity
    
    # Find the positions of the two changed chunks
    i_replace = [i for i, change in enumerate(changes_types) if change == "replace"]
    
    # If two changed chunks not have length less than chunk_max_length, return
    if not (opcodes[i_replace[0]][2] - opcodes[i_replace[0]][1] <= chunk_max_length and \
            opcodes[i_replace[1]][2] - opcodes[i_replace[1]][1] <= chunk_max_length):
        return is_two_chunk_changed, chunk_distance, is_same_subtree, changes_similarity
    
    # At this point, this should be a valid two_chunk within length change
    is_two_chunk_changed = True
    
    # Check if there is indeed an equal chunks in between of the two changed chunk
    # Calculate the distance between two chunks = the equal chunk in between
    i_equal_in_between = (i_replace[1] + i_replace[0]) // 2
    assert opcodes[i_equal_in_between][0] == 'equal'
    chunk_distance = opcodes[i_equal_in_between][2] - opcodes[i_equal_in_between][1]


    if spacy_model is not None: 
        # In the two_chunk_changed case when chunk_max_length=1, i.e., only two words are changed 
        # comparing to the original translation
        # Check if the two changed words are in the same sub tree of the dependency tree
        if (opcodes[i_replace[0]][4] - opcodes[i_replace[0]][3] == 1 and \
            opcodes[i_replace[1]][4] - opcodes[i_replace[1]][3] == 1):
            # Find the ancestors and children of the two changed words
            doc = spacy_model(' '.join(changed_tokenized))
            token1, token2 = None, None
            family1, family2 = None, None
            for token in doc:
                if token.text == changed_tokenized[opcodes[i_replace[0]][3]]:
                    token1 = token.text
                    family1 = list(token.ancestors) + list(token.children)
                    family1 = [t.text for t in family1]
                elif token.text == changed_tokenized[opcodes[i_replace[1]][3]]:
                    token2 = token.text
                    family2 = list(token.ancestors) + list(token.children)
                    family2 = [t.text for t in family2]

            if token1 is None or token2 is None:
                is_same_subtree = pd.NA
            else:
                if token1 in family2 or token2 in family1:
                    is_same_subtree = True
                else:
                    is_same_subtree = False


    # Calculate the senmatic similarity of the two changed words (cosine similarity in [-1, 1])
    if w2v_model is not None:
        # Can only calculate when only two single tokens are changed
        if (opcodes[i_replace[0]][4] - opcodes[i_replace[0]][3] == 1 and \
            opcodes[i_replace[1]][4] - opcodes[i_replace[1]][3] == 1 and \
            opcodes[i_replace[0]][2] - opcodes[i_replace[0]][1] == 1 and \
            opcodes[i_replace[1]][2] - opcodes[i_replace[1]][1] == 1):

            original_word_1 = original_tokenized[opcodes[i_replace[0]][1]]
            changed_word_1 = changed_tokenized[opcodes[i_replace[0]][3]]

            original_word_2 = original_tokenized[opcodes[i_replace[1]][1]]
            changed_word_2 = changed_tokenized[opcodes[i_replace[1]][3]]

            if original_word_1 in w2v_model.index_to_key and original_word_2 in w2v_model.index_to_key and \
                changed_word_1 in w2v_model.index_to_key and changed_word_2 in w2v_model.index_to_key:
                changes_similarity = [{'original_word': original_word_1, 
                                       'changed_word': changed_word_1, 
                                       'semantic_similarity': w2v_model.similarity(original_word_1, changed_word_1)},
                                      {'original_word': original_word_2,
                                       'changed_word': changed_word_2,
                                       'semantic_similarity': w2v_model.similarity(original_word_2, changed_word_2)}]


    return is_two_chunk_changed, chunk_distance, is_same_subtree, changes_similarity
    
    
def highlight_changes(original_tokenized, changed_tokenized, opcodes):
    """
    Params:
        original_tokenized: tokenized original sentence
        changed_tokenized: tokenized changed sentence
        opcodes: changes to get from `original_tokenized` to `changed_tokenized`
    Returns:
        original_sentence and changed_sentence with the changes highlighted in capital
    """
    
    highlighted_original_sentence_positions = []
    highlighted_changed_sentence_positions = []
    
    for opcode in opcodes:
        tag, i1, i2, j1, j2 = opcode[0], opcode[1], opcode[2], opcode[3], opcode[4]
        
        if tag != 'equal':
            highlighted_original_sentence_positions.append((i1, i2))
            highlighted_changed_sentence_positions.append((j1, j2))
            
    original_sentence_highlighted = highlight_in_capital(
        sentence_tokenized=original_tokenized, 
        highlight_positions=highlighted_original_sentence_positions
    )
    
    changed_sentence_highlighted = highlight_in_capital(
        sentence_tokenized=changed_tokenized, 
        highlight_positions=highlighted_changed_sentence_positions
    )
    
    return original_sentence_highlighted, changed_sentence_highlighted
    
    
def calculate_change(original, changed):
    # Return the original and changed sentences with the changes highlighted in capital
    
    original_tokenized = nltk.word_tokenize(original)
    changed_tokenized = nltk.word_tokenize(changed)
    
    opcodes = SequenceMatcher(None, original_tokenized, changed_tokenized).get_opcodes()
    
    # Convert the opcodes (displayed by word index) to changes in words
    changes = []
    for opcode in opcodes:
        tag, i1, i2, j1, j2 = opcode[0], opcode[1], opcode[2], opcode[3], opcode[4]
        if tag != 'equal':
            changes.append((tag, ' '.join(original_tokenized[i1:i2]), ' '.join(changed_tokenized[j1:j2])))
    
    return original_tokenized, changed_tokenized, opcodes, changes


In [6]:
def load_alignment(path_prefix):
    alignment_file_path = f"{path_prefix}_word_alignment.txt"
    if not os.path.isfile(alignment_file_path):
        raise RuntimeError("Alignment file not exist.")
        
    else:
        with open(alignment_file_path) as f:
            lines = [line.rstrip() for line in f]
            
        translation_alignment = []
        for line in lines:
            word_pairs = line.split()
            word_pairs = [word_pair.split('<sep>') for word_pair in word_pairs]
            translation_alignment.append(dict(word_pairs))
        return translation_alignment

In [7]:
def add_reason_of_change(alignment, changes, perturbed_src_word):
    if type(changes) != list:
        return pd.NA
    elif perturbed_src_word not in alignment.keys():
        changes[0]['change_type'] = None
        changes[1]['change_type'] = None
    elif alignment[perturbed_src_word] == changes[0]['changed_word'] and alignment[perturbed_src_word] == changes[1]['changed_word']:
        # Both changes are due to perturbation --> weird --> pass
        changes[0]['change_type'] = None
        changes[1]['change_type'] = None
    elif alignment[perturbed_src_word] != changes[0]['changed_word'] and alignment[perturbed_src_word] != changes[1]['changed_word']:
        # Both changes NOT due to perturbation --> weird --> pass
        changes[0]['change_type'] = None
        changes[1]['change_type'] = None
    elif alignment[perturbed_src_word] == changes[0]['changed_word']:
        changes[0]['change_type'] = "perturbed"
        changes[1]['change_type'] = "not_perturbed"
    elif alignment[perturbed_src_word] == changes[1]['changed_word']:
        changes[0]['change_type'] = "not_perturbed"
        changes[1]['change_type'] = "perturbed"
        
    return changes
        
        

In [8]:
def pos_tag_not_perturbed_change(changes, spacy_model):
    if type(changes) != list:
        return pd.NA
    elif changes[0]['change_type'] == "not_perturbed":
        doc = spacy_model(changes[0]['changed_word'])
        return [t.pos_ for t in doc][0]
    elif changes[1]['change_type'] == "not_perturbed":
        doc = spacy_model(changes[1]['changed_word'])
        return [t.pos_ for t in doc][0]
    return pd.NA

In [9]:
def read_output_df(dataset, perturb_type, beam, replacement_strategy, analyse_feature=True, 
                   ignore_case=False, no_of_replacements=1, chunk_max_length=1, spacy_model=None, 
                   w2v_model=None, use_alignment=False, winoMT=False, ref_available=False):
    if winoMT:
        path_prefix = "output/winoMT_asmetric/wmt19_winoMT_perturbed"
        output_df = pd.read_csv('output/winoMT_asmetric/wmt19_winoMT_perturbed_format.csv', index_col=0)  
    else:
        if no_of_replacements == 1:
            path_prefix = f"output/{dataset}/{replacement_strategy}/beam{beam}_perturb{perturb_type}/seed0/translations"
        else:
            path_prefix = f"output/{dataset}/{replacement_strategy}/beam{beam}_perturb{perturb_type}/seed0/translations_5replacements"

        output_df = pd.read_csv(f"{path_prefix}.csv", index_col=0)

        # Join to get the translation of the original sentences as well
        original_trans_path_prefix = \
            f"output/{dataset}/{replacement_strategy}/beam{beam}_perturbNone/seed0/translations"
        output_df = output_df.join(pd.read_csv(
            f"{original_trans_path_prefix}.csv", index_col=0
        )['OriginalSRC-Trans'])
        
    
    # Convert columns with sentences to str type
    cols = ['SRC', 'REF', 'SRC_perturbed', 'SRC_perturbed-Trans', 'OriginalSRC-Trans']
    if not ref_available:
        cols.remove('REF')
    output_df[cols] = output_df[cols].astype(str)
    
    if ignore_case:
        output_df[cols] = output_df[cols].applymap(lambda x: x.lower())
    
    # Reorder the columns
    if winoMT:
        cols = ['SRC', 'REF', 'original_word', 'perturbed_word', 'SRC_perturbed', 'OriginalSRC-Trans', 'SRC_perturbed-Trans', 'Bias_sample']
    elif no_of_replacements == 1:
        cols = ['SRC', 'REF', 'original_word', 'perturbed_word', 'SRC_perturbed', 'OriginalSRC-Trans', 'SRC_perturbed-Trans']
    else:
        cols = ['SRC_index', 'SRC', 'REF', 'original_word', 'perturbed_word', 'SRC_perturbed', 'OriginalSRC-Trans', 'SRC_perturbed-Trans']
    if not ref_available:
        cols.remove('REF')
    output_df = output_df[cols]
    
    if analyse_feature:
        print(f"Original df shape: {output_df.shape}")
        output_df = output_df.dropna()
        print(f"After dropping none-perturbed sentences: {output_df.dropna().shape}")
        
        
        # Calculate the changes, i.e., how to get from the original trans sentence 
        # to the changed trans sentence
        output_df['tokenized_OriginalSRC-Trans'], output_df['tokenized_SRC_perturbed-Trans'], output_df['opcodes'], output_df['changes'] \
            = zip(*output_df.apply(
                lambda x: calculate_change(x['OriginalSRC-Trans'], 
                                           x['SRC_perturbed-Trans']), axis=1
            ))
        
        
        # Highlight the changes in the trans sentences
        output_df["OriginalSRC-Trans"], output_df['SRC_perturbed-Trans'] \
            = zip(*output_df.apply(
                lambda x: highlight_changes(
                    x['tokenized_OriginalSRC-Trans'], 
                    x['tokenized_SRC_perturbed-Trans'], 
                    x['opcodes']), axis=1
            ))
        
        
        
        
        if replacement_strategy == 'word2vec_similarity':
            # SRC difference is the number of occurances of the word we perturb
            output_df["SRC-edit_distance"] = output_df.apply(lambda x: x['tokenized_OriginalSRC-Trans'].count(x['original_word']), axis=1)
        else:
            output_df["SRC-edit_distance"] = 1
        output_df['Trans-edit_distance'] =  output_df.apply(
            lambda x: levenshtein(x['tokenized_OriginalSRC-Trans'], x['tokenized_SRC_perturbed-Trans']), axis=1)
        output_df["#TransChanges-#SrcChanges"] = output_df['Trans-edit_distance'] - output_df['SRC-edit_distance']
        
        output_df["#TransChanges-#SrcChanges/SentenceLength"] = (output_df['Trans-edit_distance'] - output_df['SRC-edit_distance']) / output_df['SRC'].apply(lambda x: len(nltk.word_tokenize(x)))
        
        output_df["ChangesSpread/SentenceLength"] = output_df.apply(
            lambda x: changes_spread(x['tokenized_OriginalSRC-Trans'], 
                                     x['tokenized_SRC_perturbed-Trans'], 
                                     x['opcodes']), axis=1)
        
        
        
        # See if only two chunks within given max size are changed, 
        # and do some analysis on this special case
        output_df['TwoChunksChanged'], output_df['ChunkDistance'], \
        output_df["is_same_subtree"], output_df['changes_similarity'] \
            = zip(*output_df.apply(
                lambda x: two_chunk_changed(x['tokenized_OriginalSRC-Trans'],
                                            x['tokenized_SRC_perturbed-Trans'],
                                            x['opcodes'],
                                            chunk_max_length=chunk_max_length,
                                            spacy_model=spacy_model,
                                            w2v_model=w2v_model), axis=1
            ))

        
        
        if use_alignment:
            if not winoMT:
                original_alignment = load_alignment(original_trans_path_prefix)
                output_df['original_trans_alignment'] = [alignment for alignment in original_alignment for _ in range(no_of_replacements)]
            output_df['perturbed_trans_alignment'] = load_alignment(path_prefix)
            
            # In the case where two changes occurs and the two similarities is calculated, 
            # find out which change is due to the perturbation
            output_df['changes_similarity'] = output_df.apply(
                lambda x: add_reason_of_change(
                    alignment=x['perturbed_trans_alignment'],
                    changes=x['changes_similarity'],
                    perturbed_src_word=x['perturbed_word']
                ),
                axis=1
            )
            
            if spacy_model is not None:
                # Add POS tagging of the not-perturbed change
                output_df['not_perturbed_TGT_change_type'] = output_df['changes_similarity'].apply(
                    lambda x: pos_tag_not_perturbed_change(x, spacy_model))
            
        
        # Analyse on group of changes on the same sentence
        if no_of_replacements > 1:
            additional_col_1 = output_df.groupby(by="SRC_index", axis=0)[['Trans-edit_distance', '#TransChanges-#SrcChanges']].std()
            additional_col_2 = output_df.groupby(by="SRC_index", axis=0)[['TwoChunksChanged']].sum()
            
            output_df = output_df.join(additional_col_1, rsuffix='--SD')
            output_df = output_df.join(additional_col_2, rsuffix='--total')
        
    return output_df

    


In [11]:
perturb_type = 'content'
dataset = f'masked_{perturb_type}_covost2_for_en2de'  # 'MuST-SHE-en2fr' 'IWSLT15-en2vi' 'wmt19-newstest2019-en2de'
beam = 5
replacement_strategy = 'masking_language_model'
no_of_replacements = 5
ignore_case = False  # Only Europarls needs ignore case
chunk_max_length=1
spacy_model = spacy.load("de_core_news_sm")
# Loading these models in is time consuming
de_model = load_facebook_model("data/cc.de.300.bin").wv
# vi_model = load_facebook_model("data/cc.vi.300.bin").wv
winoMT = False

# # This overwrite the above params
# winoMT = True
# perturb_type = 'pronoun'
# no_of_replacements = 1

output = read_output_df(dataset=dataset, perturb_type=perturb_type, beam=beam, 
                        replacement_strategy=replacement_strategy, ignore_case=ignore_case,
                        no_of_replacements=no_of_replacements, chunk_max_length=chunk_max_length,
                        spacy_model=spacy_model, w2v_model=de_model, use_alignment=True, 
                        winoMT=winoMT, analyse_feature=True)

# print('BLEU score: ')
# sacrebleu.corpus_bleu(output['OriginalSRC-Trans'].tolist(), [output['REF'].tolist()]).score

Original df shape: (281250, 7)
After dropping none-perturbed sentences: (281250, 7)


# Comments

- On `wmt19-newstest2019-en2de, chunk_max_length=2`
    - 902: change to 1 SRC word leads to fixed changes of an irrelevant word
    - In many cases, the form of the verb (e.g., current or past tense) are changed --> harmful in the sense that it hurt performance score?
    - Word not being translated 
    - Spoken/written style
    - Time
    
    
- On `IWSLT15-en2vi, adjective`
    - 1003: change of 1 words consistently leads to change in subject
    
    - 1003, 145, 990 noun: same
    - 236 noun: same, funny but not sure if it is wrong
    - 308 verb same 
    
--> Quantify the verb form change by stemming/lemmatization
    
Chúng, họ, gã, cô ấy, cô ta, anh ta, hắn

Changes in the word "you"


In [None]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 9999999)
# output[output['#TransChanges-#SrcChanges'] > 10].head(5)
# output[output["ChangesSpread/SentenceLength"] > 0.85].head(20)



# Two chunks changed that consistently changed over the different replacement of a word


# output[(output["TwoChunksChanged"] == True) & (output["TwoChunksChanged--total"] == 5)].sort_values(by='ChunkDistance', axis=0, ascending=False).head(1)
# output[(output["TwoChunksChanged"] == True)].sort_values(by='ChunkDistance', axis=0, ascending=False).head(100)

# Two words changed that are not in the same subtree
# output[(output["TwoChunksChanged"] == True) & (output["is_same_subtree"] == False) & (output["TwoChunksChanged--total"] == 5)]




# IWSLT15-en2vi, noun
# output.loc[[1003, 145, 990, 236]]







Sort the samples by the least similarity in changed words

In [None]:
# Filter out the 2-word-changed cases and similarity can be calculated
def get_not_perturbed_change_similarity(changes):
    for change in changes:
        if change['change_type'] == 'not_perturbed':
            return change['semantic_similarity']
    return pd.NA

analyse_df = output[
    (output["TwoChunksChanged"] == True) & output['changes_similarity'].notna() & output['not_perturbed_TGT_change_type'].isin(['NOUN', 'VERB', 'ADJ', 'PRON'])
]
analyse_df['similarity_not_perturbed'] = analyse_df['changes_similarity'].apply(
    lambda x: get_not_perturbed_change_similarity(x)
)
analyse_df.sort_values(by='similarity_not_perturbed')[['SRC', 
                                                f'original_word', 
                                                f'perturbed_word',
                                                'OriginalSRC-Trans',
                                                f'SRC_perturbed-Trans',
                                                'ChunkDistance',
                                                'changes_similarity',
                                                'similarity_not_perturbed',
                                                'not_perturbed_TGT_change_type',
#                                                 'Bias_sample'
                                                      ]].head(50)


### Calculate metrics for detecting the bias samples

High precision --> higher chance that the returned samples are bias --> save human time

High recall --> more bias samples are retreat --> can detect more type of bias

We focus on precision then (save human cost)

In [None]:
from sklearn.metrics import classification_report

print(' -------------------- Most-changes filter -------------------- ')
q = 20  # Take the q% sentences with the highest changes
no_changes_thresthold = np.percentile(output['#TransChanges-#SrcChanges'], 100-q)
bias_prediction = output['#TransChanges-#SrcChanges'] > no_changes_thresthold
results = classification_report(
    y_true=output['Bias_sample'], y_pred=bias_prediction, 
)
print(results)

print(' -------------------- Most-spreaded_changes filter -------------------- ')
q = 20  # Take the q% sentences with the highest spread
spread_thresthold = np.percentile(output['ChangesSpread/SentenceLength'], 100-q)
bias_prediction = output['ChangesSpread/SentenceLength'] > spread_thresthold
results = classification_report(
    y_true=output['Bias_sample'], y_pred=bias_prediction, 
)
print(results)

print(' -------------------- Two-changes filter -------------------- ')
bias_prediction = output["TwoChunksChanged"]
results = classification_report(
    y_true=output['Bias_sample'], y_pred=bias_prediction, 
)
print(results)


print(' -------------------- Two-faraway-changes filter -------------------- ')
q = 20  # Take the q% sentences with the furthest distance between 2 changes 
distance_thresthold = np.nanpercentile(output['ChunkDistance'], 100-q)
bias_prediction = output["TwoChunksChanged"] & (output['ChunkDistance'] > distance_thresthold)
results = classification_report(
    y_true=output['Bias_sample'], y_pred=bias_prediction, 
)
print(results)

print(' -------------------- Two-changes-different-subtree filter -------------------- ')
bias_prediction = output["TwoChunksChanged"] & (output["is_same_subtree"] == False)
results = classification_report(
    y_true=output['Bias_sample'], y_pred=bias_prediction, 
)
print(results)


print(' -------------------- Two-change-dissimilar filter -------------------- ')
q = 90  # Take the q% sentences with the lowest similarity of the not-perturbed change
output = output.join(analyse_df['similarity_not_perturbed'])
similiarity_threshold = np.nanpercentile(output['similarity_not_perturbed'], q)

bias_prediction = output["TwoChunksChanged"] & (output['similarity_not_perturbed'] < similiarity_threshold)
results = classification_report(
    y_true=output['Bias_sample'], y_pred=bias_prediction, 
)
print(results)

# Analyse on same original_word accross sentences

In [None]:
output[[
    'SRC_index', 'SRC', 'original_word', 'perturbed_word', 'SRC_perturbed',
    'OriginalSRC-Trans', 'SRC_perturbed-Trans', '#TransChanges-#SrcChanges',
    '#TransChanges-#SrcChanges/SentenceLength',
    'ChangesSpread/SentenceLength', 'TwoChunksChanged', 'ChunkDistance',
    'is_same_subtree', 'changes_similarity', 'perturbed_trans_alignment',
    'not_perturbed_TGT_change_type', 'Trans-edit_distance--SD',
    '#TransChanges-#SrcChanges--SD', 'TwoChunksChanged--total'
]].groupby('original_word').mean().head()




### Most changes filter:

In [None]:
groupped_by_word = output.groupby('original_word').mean()

q = 10  # Take the q% groups with the highest changes
no_changes_thresthold = np.percentile(groupped_by_word['#TransChanges-#SrcChanges'], 100-q)
bias_prediction = groupped_by_word['#TransChanges-#SrcChanges'] > no_changes_thresthold

bias_word_predicted = groupped_by_word[bias_prediction].index.values

output[
    output['original_word'].isin(bias_word_predicted) & \
    (output['#TransChanges-#SrcChanges'] > no_changes_thresthold)
].head(2)





### Most-spreaded_changes filter

In [None]:
groupped_by_word = output.groupby('original_word').mean()

q = 10  # Take the q% sentences with the highest spread
spread_thresthold = np.percentile(groupped_by_word['ChangesSpread/SentenceLength'], 100-q)
bias_prediction = groupped_by_word['ChangesSpread/SentenceLength'] > spread_thresthold

bias_word_predicted = groupped_by_word[bias_prediction].index.values

output[
    output['original_word'].isin(bias_word_predicted) & \
    (output['ChangesSpread/SentenceLength'] > spread_thresthold)
].head(2)


### Two-faraway-changes filter

ACTUALLY two-changes is not a bias filter. It's just an auxilary filter to avoid paraphrasing cases. Using this we will miss out on the cases where the model has both paraphrasing and 

Here we consider in each group: the number of sentences that has 2 changes

In [None]:
two_change_only_groupped_by_word = output[output["TwoChunksChanged"]].groupby('original_word').mean()


q = 20  # Take the q% sentences with the furthest distance between 2 changes 
distance_thresthold = np.percentile(two_change_only_groupped_by_word['ChunkDistance'], 100-q)
bias_prediction = two_change_only_groupped_by_word['ChunkDistance'] > distance_thresthold


bias_word_predicted = two_change_only_groupped_by_word[bias_prediction].index.values

output[
    output["TwoChunksChanged"] & \
    output['original_word'].isin(bias_word_predicted) & \
    (output['ChunkDistance'] > distance_thresthold)
].head(2)



### Two-changes-different-subtree filter

In [None]:
tmp = output[output["TwoChunksChanged"] & output['is_same_subtree'].notna()]
tmp['not_same_subtree'] = 1 - tmp['is_same_subtree'].astype(int)
two_change_only_groupped_by_word = tmp.groupby('original_word').sum()

q = 20  # Take the q% groups with the highest number of different subtree changes
count_thresthold = np.percentile(two_change_only_groupped_by_word['not_same_subtree'], 100-q)
bias_prediction = two_change_only_groupped_by_word['ChunkDistance'] > count_thresthold


bias_word_predicted = two_change_only_groupped_by_word[bias_prediction].index.values

output[
    output["TwoChunksChanged"] & \
    output['original_word'].isin(bias_word_predicted) & \
    (output['is_same_subtree'] == 0)
].head(2)



### Two-change-dissimilar filter

In [None]:
output = output.join(analyse_df['similarity_not_perturbed'])
two_change_only_groupped_by_word = output[output["TwoChunksChanged"]].groupby('original_word').mean()


q = 20  # Take the q% sentences with the lowest similarity of the not-perturbed change
similiarity_threshold = np.nanpercentile(two_change_only_groupped_by_word['similarity_not_perturbed'], q)
bias_prediction = two_change_only_groupped_by_word['similarity_not_perturbed'] < similiarity_threshold


bias_word_predicted = two_change_only_groupped_by_word[bias_prediction].index.values

output[
    output["TwoChunksChanged"] & \
    output['original_word'].isin(bias_word_predicted) & \
    (output['similarity_not_perturbed'] < similiarity_threshold)
].head(2)






In [None]:
output.columns

## Find patterns

when a word A is replaced with B, then the change C happens

In [12]:
output.head()

Unnamed: 0,SRC_index,SRC,original_word,perturbed_word,SRC_perturbed,OriginalSRC-Trans,SRC_perturbed-Trans,tokenized_OriginalSRC-Trans,tokenized_SRC_perturbed-Trans,opcodes,...,TwoChunksChanged,ChunkDistance,is_same_subtree,changes_similarity,original_trans_alignment,perturbed_trans_alignment,not_perturbed_TGT_change_type,Trans-edit_distance--SD,#TransChanges-#SrcChanges--SD,TwoChunksChanged--total
0,0,“Don’t accept.”,accept,have,“Don’t have.”,< UNK > AKZEPTIERE nicht . < UNK >,HABEN SIE nicht .,"[<, unk, >, Akzeptiere, nicht, ., <, unk, >]","[Haben, sie, nicht, .]","[(replace, 0, 4, 0, 2), (equal, 4, 6, 2, 4), (...",...,False,,,,"{'“': '>', 'Don': 'unk', 't': 'unk', 'accept':...","{'Don': 'Haben', 'have': 'nicht', '.': '.'}",,0.547723,0.547723,0
0,0,“Don’t accept.”,accept,make,“Don’t make.”,< UNK > AKZEPTIERE NICHT . < UNK >,MACHT NICHTS .,"[<, unk, >, Akzeptiere, nicht, ., <, unk, >]","[Macht, nichts, .]","[(replace, 0, 5, 0, 2), (equal, 5, 6, 2, 3), (...",...,False,,,,"{'“': '>', 'Don': 'unk', 't': 'unk', 'accept':...","{'Don': 'Macht', 'make': 'nichts', '.': '.'}",,0.547723,0.547723,0
0,0,“Don’t accept.”,accept,know,“Don’t know.”,< UNK > AKZEPTIERE nicht . < UNK >,ICH WEISS ES nicht .,"[<, unk, >, Akzeptiere, nicht, ., <, unk, >]","[Ich, weiß, es, nicht, .]","[(replace, 0, 4, 0, 3), (equal, 4, 6, 3, 5), (...",...,False,,,,"{'“': '>', 'Don': 'unk', 't': 'unk', 'accept':...","{'know': 'weiß', '.': '.'}",,0.547723,0.547723,0
0,0,“Don’t accept.”,accept,see,“Don’t see.”,< UNK > AKZEPTIERE nicht . < UNK >,MAN KANN ES nicht SEHEN .,"[<, unk, >, Akzeptiere, nicht, ., <, unk, >]","[Man, kann, es, nicht, sehen, .]","[(replace, 0, 4, 0, 3), (equal, 4, 5, 3, 4), (...",...,False,,,,"{'“': '>', 'Don': 'unk', 't': 'unk', 'accept':...","{'t': 'nicht', 'see': 'sehen', '.': '.'}",,0.547723,0.547723,0
0,0,“Don’t accept.”,accept,do,“Don’t do.”,< UNK > AKZEPTIERE nicht . < UNK >,TUT ES ABER nicht .,"[<, unk, >, Akzeptiere, nicht, ., <, unk, >]","[Tut, es, aber, nicht, .]","[(replace, 0, 4, 0, 3), (equal, 4, 6, 3, 5), (...",...,False,,,,"{'“': '>', 'Don': 'unk', 't': 'unk', 'accept':...","{'Don': 'Tut', 'do': 'nicht', '.': '.'}",,0.547723,0.547723,0


In [13]:
output[['SRC_index', 'SRC', 'original_word', 'perturbed_word', 'SRC_perturbed',
       'OriginalSRC-Trans', 'SRC_perturbed-Trans', 'changes']].head()

Unnamed: 0,SRC_index,SRC,original_word,perturbed_word,SRC_perturbed,OriginalSRC-Trans,SRC_perturbed-Trans,changes
0,0,“Don’t accept.”,accept,have,“Don’t have.”,< UNK > AKZEPTIERE nicht . < UNK >,HABEN SIE nicht .,"[(replace, < unk > Akzeptiere, Haben sie), (de..."
0,0,“Don’t accept.”,accept,make,“Don’t make.”,< UNK > AKZEPTIERE NICHT . < UNK >,MACHT NICHTS .,"[(replace, < unk > Akzeptiere nicht, Macht nic..."
0,0,“Don’t accept.”,accept,know,“Don’t know.”,< UNK > AKZEPTIERE nicht . < UNK >,ICH WEISS ES nicht .,"[(replace, < unk > Akzeptiere, Ich weiß es), (..."
0,0,“Don’t accept.”,accept,see,“Don’t see.”,< UNK > AKZEPTIERE nicht . < UNK >,MAN KANN ES nicht SEHEN .,"[(replace, < unk > Akzeptiere, Man kann es), (..."
0,0,“Don’t accept.”,accept,do,“Don’t do.”,< UNK > AKZEPTIERE nicht . < UNK >,TUT ES ABER nicht .,"[(replace, < unk > Akzeptiere, Tut es aber), (..."


In [38]:
import string

def lower_remove_non_alphabet(input_str):
    translation = input_str.maketrans(dict.fromkeys(string.punctuation, ' '))
    return input_str.translate(translation).lower()

In [103]:
def is_due_to_perturbation(change, original_word, perturbed_word, 
                           perturbed_trans_alignment_dict, original_trans_alignment_dict):
    """
    A change in translation is directly due to perturbation if the (aligned) translation of perturbed_word
    is in changed_part AND the (aligned) translation of original_word is in original_part
    
    Params:
        change: tuple of (change_type, original_trans_part, changed_trans_part)
        original_word: original word in the SRC that was perturbed
        perturbed_word: the replacement of the original word
        perturbed_trans_alignment_dict: {src_word1:trans_word1, src_word2:trans_word2,...} of the perturbed trans
        original_trans_alignment_dict: {src_word1:trans_word1, src_word2:trans_word2,...} of the original trans
    Return: bool, pd.NA in failed alignment case
    """
    # Turn everything to lowercase, and remove any non-alphabet characters
    change_type, original_trans_part, changed_trans_part = \
        change[0], lower_remove_non_alphabet(change[1]), lower_remove_non_alphabet(change[2])
    perturbed_trans_alignment_dict = dict(
        (lower_remove_non_alphabet(k).replace(' ', ''), lower_remove_non_alphabet(v).replace(' ', '')) for k,v in perturbed_trans_alignment_dict.items()
    )
    original_trans_alignment_dict = dict(
        (lower_remove_non_alphabet(k).replace(' ', ''), lower_remove_non_alphabet(v).replace(' ', '')) for k,v in original_trans_alignment_dict.items()
    )
    original_word = lower_remove_non_alphabet(original_word)
    perturbed_word = lower_remove_non_alphabet(perturbed_word)
    

    perturbed_word_appears_in_new_trans = pd.NA
    if perturbed_word in perturbed_trans_alignment_dict.keys():
        perturbed_word_trans = perturbed_trans_alignment_dict[perturbed_word]
        if perturbed_word_trans in changed_trans_part.split():
            perturbed_word_appears_in_new_trans = True
        else:
            perturbed_word_appears_in_new_trans = False
            
    # Missed-translation, or name-specific case
    if perturbed_word in changed_trans_part.split():
        perturbed_word_appears_in_new_trans = True
            

    original_word_appears_in_old_trans = pd.NA
    if original_word in original_trans_alignment_dict.keys():
        original_word_trans = original_trans_alignment_dict[original_word]
        if original_word_trans in original_trans_part.split():
            original_word_appears_in_old_trans = True
        else:
            original_word_appears_in_old_trans = False
        
        if perturbed_word in perturbed_trans_alignment_dict.keys():
            if original_word == 'fort' and perturbed_word == 'île' and change == ('replace', 'Fort-de-France', 'Île-de-France'):
                print('-------------------------')
                print(change)
                print('-' + original_word_trans + '-')
                print('-' + perturbed_word_trans + '-')
                print(original_word_appears_in_old_trans)
                print(perturbed_word_appears_in_new_trans)
                
    # Missed-translation, or name-specific case
    if original_word in original_trans_part.split():
        original_word_appears_in_old_trans = True
            
    # If perturbed_word_appears_in_new_trans or original_word_appears_in_old_trans is true, then 
    # is_due_to_perturbation is true
    if (not pd.isnull(perturbed_word_appears_in_new_trans)) and \
        (not pd.isnull(original_word_appears_in_old_trans)):
        return (perturbed_word_appears_in_new_trans or original_word_appears_in_old_trans)
    elif (pd.isnull(perturbed_word_appears_in_new_trans)) and \
        (not pd.isnull(original_word_appears_in_old_trans)):
        if original_word_appears_in_old_trans:
            return True
        else:
            return pd.NA
    elif (not pd.isnull(perturbed_word_appears_in_new_trans)) and \
        (pd.isnull(original_word_appears_in_old_trans)):
        if perturbed_word_appears_in_new_trans:
            return True
        else:
            return pd.NA
    else:
        return pd.NA
    
    
def filter_changes(group_df):
    changes = []
    
    for index, row in group_df.iterrows():
        for change in row['changes']:
            # Filter out the changes caused by perturbation
            is_due_to_perturbation_out = is_due_to_perturbation(
                                            change, 
                                            row['original_word'], 
                                            row['perturbed_word'], 
                                            row['perturbed_trans_alignment'],
                                            row['original_trans_alignment']
                                        )
            if pd.isnull(is_due_to_perturbation_out) or is_due_to_perturbation_out:
                continue
                
            # Filter out the weird <unk>
            if change == ('delete', '< unk >', '') or change == ('insert', '', '< unk >'):
                continue
                
            # Filter out the changes that are not content-related
            all_pos_tags = [t.pos_ for t in spacy_model(change[1])] + [t.pos_ for t in spacy_model(change[2])]
            content_related_tags = 'NOUN', 'VERB', 'ADJ', 'PRON'
            overlap = not set(all_pos_tags).isdisjoint(content_related_tags)
            if not overlap:
                continue
                
            changes.append(change)
            
            
    return changes



In [104]:
from collections import Counter


def find_max_freq_change(group_df):
    """
    Params: 
        group_df: the group of results that has the same original_word and perturbed_word
    """
    assert group_df['original_word'].value_counts().shape[0] == 1  # Because this function is for a single group
    assert group_df['perturbed_word'].value_counts().shape[0] == 1  # Because this function is for a single group
    
    # Filter out the changes that are not directly due to perturbation
    all_changes = filter_changes(group_df)
    
    freq_changes = Counter(all_changes)
    
    if len(freq_changes.most_common()) == 0:
        return 0
    return freq_changes.most_common(1)[0][1]

change_freq = output.groupby(
    ['original_word', 'perturbed_word'], as_index=False
).apply(find_max_freq_change).rename(columns={None: 'max_change_freq'}).sort_values(
    by='max_change_freq', ascending=False)
    

change_freq = change_freq[change_freq['perturbed_word'].apply(lambda x: x.isalpha())]

change_freq.head(10)

Unnamed: 0,original_word,perturbed_word,max_change_freq
624,california,you,21
3913,ranked,The,16
3910,ranked,Book,15
3984,reign,school,15
1675,excuse,Believe,14
3980,reign,Academy,14
3981,reign,School,14
3982,reign,University,14
2073,grade,sons,12
2072,grade,imperial,11


In [105]:
groups = output.groupby(['original_word', 'perturbed_word'])
groups_as_list = [(original_perturb, group) for original_perturb, group in groups]
re_ordered_groupes = [groups_as_list[i] for i in change_freq.index.values]

for original_perturb, group in re_ordered_groupes:
    print("----------------------")
    print(f"original SRC word: {original_perturb[0]}")
    print(f"perturbed SRC word: {original_perturb[1]}")
    all_changes = filter_changes(group)
    freq_changes = Counter(all_changes)
    print(freq_changes.most_common(2))

----------------------
original SRC word: california
perturbed SRC word: you
[(('replace', 'USA', 'Vereinigte Staaten'), 21), (('replace', 'vom Zensus', 'von der Volkszählung'), 1)]
----------------------
original SRC word: ranked
perturbed SRC word: The
[(('replace', 'Grades', 'Klasse'), 16), (('insert', '', 'Grad'), 16)]
----------------------
original SRC word: ranked
perturbed SRC word: Book
[(('insert', '', 'Grad'), 15), (('replace', 'Jiajing-Herrschaft', 'Jiajing Herrschaft'), 8)]
----------------------
original SRC word: reign
perturbed SRC word: school
[(('replace', 'Grades', 'Klasse'), 15), (('replace', 'Rang', 'Platz'), 14)]
----------------------
original SRC word: excuse
perturbed SRC word: Believe
[(('insert', '', 'mir'), 14), (('insert', '', 'uns'), 4)]
----------------------
original SRC word: reign
perturbed SRC word: Academy
[(('replace', 'Rang', 'Platz'), 14), (('replace', 'Klasse', 'Grades'), 3)]
----------------------
original SRC word: reign
perturbed SRC word: Sch

[(('replace', 'E-Mail-Adresse', 'E-Mail-Nummer'), 6), (('replace', 'St.-Blaise-Straße', 'St-Blaise-Straße'), 1)]
----------------------
original SRC word: act
perturbed SRC word: move
[(('insert', '', 'sich'), 6), (('insert', '', 'uns'), 4)]
----------------------
original SRC word: amendment
perturbed SRC word: floor
[(('replace', 'unterstützen', 'stützen'), 6), (('replace', 'unterstützen', 'ergreifen'), 5)]
----------------------
original SRC word: excuse
perturbed SRC word: forgive
[(('insert', '', 'mir'), 6), (('insert', '', 'uns'), 2)]
----------------------
original SRC word: clos
perturbed SRC word: Rue
[(('delete', 'Straße', ''), 6), (('replace', 'Straße', 'Street'), 2)]
----------------------
original SRC word: sure
perturbed SRC word: glad
[(('delete', 'mir', ''), 6), (('insert', '', 'es'), 2)]
----------------------
original SRC word: ranked
perturbed SRC word: Chapter
[(('insert', '', 'Grad'), 6), (('replace', 'belegte', 'war'), 5)]
----------------------
original SRC word:

[(('insert', '', 'sich'), 4), (('replace', 'jeder Kreuzung', 'jedem Übergang'), 1)]
----------------------
original SRC word: wanli
perturbed SRC word: study
[(('replace', 'dritten', 'dritte'), 4), (('insert', '', 'Sie'), 3)]
----------------------
original SRC word: eighteen
perturbed SRC word: The
[(('delete', 'Straße', ''), 4), (('insert', '', 'in-acht'), 1)]
----------------------
original SRC word: wanli
perturbed SRC word: school
[(('replace', 'dritten', 'dritte'), 4), (('insert', '', 'Sie'), 3)]
----------------------
original SRC word: nowadays
perturbed SRC word: France
[(('delete', 'es', ''), 4), (('delete', 'Politiker', ''), 1)]
----------------------
original SRC word: fight
perturbed SRC word: defend
[(('insert', '', 'sich'), 4), (('insert', '', 'die Bewältigung einer'), 1)]
----------------------
original SRC word: threw
perturbed SRC word: lowered
[(('insert', '', 'fallen'), 4), (('replace', 'Tüten', 'Taschen herunter'), 2)]
----------------------
original SRC word: ate


[(('delete', 'Straße', ''), 3), (('replace', 'Straße', 'Street'), 3)]
----------------------
original SRC word: spent
perturbed SRC word: used
[(('insert', '', 'verbracht'), 3), (('delete', 'nicht geschlafen und', ''), 1)]
----------------------
original SRC word: prefecture
perturbed SRC word: Province
[(('replace', 'Bezirk', 'Kreis'), 3), (('replace', 'der Stadt', 'Stadtebene'), 1)]
----------------------
original SRC word: move
perturbed SRC word: come
[(('delete', 'sich', ''), 3), (('replace', 'Zeit', ', um'), 1)]
----------------------
original SRC word: told
perturbed SRC word: wants
[(('insert', '', 'wissen'), 3), (('replace', 'davon', 'darüber Bescheid wissen'), 1)]
----------------------
original SRC word: hurry
perturbed SRC word: get
[(('replace', 'sich', 'auf'), 3), (('replace', 'Kreuzaugen', 'schielen'), 1)]
----------------------
original SRC word: turn
perturbed SRC word: get
[(('replace', 'Jetzt', 'Es'), 3), (('replace', 'Nun', 'Es'), 3)]
----------------------
original

[(('replace', 'er', 'sie'), 3), (('replace', 'Steuerung', 'Leitung'), 1)]
----------------------
original SRC word: hour
perturbed SRC word: day
[(('replace', 'einer', 'einem'), 3), (('replace', 'weinte', 'habe'), 1)]
----------------------
original SRC word: breton
perturbed SRC word: English
[(('replace', 'Herr', 'Mister'), 3), (('replace', 'angeboten', 'vorgeschlagen'), 1)]
----------------------
original SRC word: change
perturbed SRC word: mean
[(('delete', 'sich', ''), 3), (('replace', 'in dieser Angelegenheit', 'dazu'), 1)]
----------------------
original SRC word: vote
perturbed SRC word: speak
[(('insert', '', 'sich'), 3), (('insert', '', 'mich'), 3)]
----------------------
original SRC word: effort
perturbed SRC word: step
[(('delete', 'sich', ''), 3), (('insert', '', 'Nutzung von'), 1)]
----------------------
original SRC word: sat
perturbed SRC word: looked
[(('delete', 'sich', ''), 3), (('replace', 'einer dunklen', 'eine dunkle'), 1)]
----------------------
original SRC wo

[(('replace', 'wünschte', 'hoffe'), 3), (('delete', 'es', ''), 2)]
----------------------
original SRC word: amendment
perturbed SRC word: other
[(('delete', 'Herr', ''), 3), (('insert', '', 'sich'), 2)]
----------------------
original SRC word: deal
perturbed SRC word: work
[(('delete', 'sich', ''), 3), (('delete', 'es', ''), 2)]
----------------------
original SRC word: play
perturbed SRC word: turn
[(('insert', '', 'sich'), 3), (('insert', '', 'mich'), 3)]
----------------------
original SRC word: express
perturbed SRC word: extend
[(('replace', 'spreche', 'möchte'), 3), (('replace', 'Diese Änderung', 'Dieser Änderungsantrag'), 1)]
----------------------
original SRC word: afraid
perturbed SRC word: scared
[(('replace', 'fürchte', 'habe Angst'), 3), (('replace', 'bevorzugte', 'zog es vor ,'), 1)]
----------------------
original SRC word: careful
perturbed SRC word: sure
[(('insert', '', 'sich'), 3), (('insert', '', 'Sie'), 1)]
----------------------
original SRC word: responsibility

[(('replace', 'universalen', 'universellen'), 2), (('replace', 'viertel', 'Viertel'), 1)]
----------------------
original SRC word: thank
perturbed SRC word: Oh
[(('replace', 'Ihre', 'deine'), 2), (('replace', 'Sie', 'dich'), 1)]
----------------------
original SRC word: science
perturbed SRC word: non
[(('replace', 'sie', 'es'), 2), (('replace', 'd.h.', 'also'), 1)]
----------------------
original SRC word: deal
perturbed SRC word: cope
[(('delete', 'es', ''), 2), (('delete', 'sich', ''), 2)]
----------------------
original SRC word: card
perturbed SRC word: account
[(('replace', 'sie', 'es'), 2), (('replace', 'einer', 'einem'), 1)]
----------------------
original SRC word: summer
perturbed SRC word: Piano
[(('delete', 'es', ''), 2), (('replace', 'werde', 'richte'), 1)]
----------------------
original SRC word: church
perturbed SRC word: the
[(('replace', 'universalen', 'universellen'), 2), (('replace', 'lang andauernden', 'langwieriger'), 1)]
----------------------
original SRC word:

[(('replace', 'dafür', 'für die Tatsache'), 2), (('replace', 'dick ist', 'dicken Hebel unter Wasser zu setzen'), 1)]
----------------------
original SRC word: issue
perturbed SRC word: question
[(('replace', 'dieses', 'diese'), 2), (('replace', 'Menschen', 'Belgrader haben'), 1)]
----------------------
original SRC word: friends
perturbed SRC word: sisters
[(('delete', 'mich', ''), 2), (('replace', 'schweigende', 'schweigsame'), 1)]
----------------------
original SRC word: front
perturbed SRC word: behalf
[(('replace', 'dem Banker', 'des Bankers'), 2), (('replace', 'stand', 'war'), 1)]
----------------------
original SRC word: teach
perturbed SRC word: taught
[(('replace', 'füttest', 'fütterst'), 2), (('replace', 'Neulinge', 'Novizen'), 1)]
----------------------
original SRC word: center
perturbed SRC word: middle
[(('insert', '', 'der Aufmerksamkeit'), 2), (('replace', 'zur Verfügung zu stellen', 'bereitzustellen'), 1)]
----------------------
original SRC word: subject
perturbed SRC

[(('replace', 'gebürtig', 'stammt'), 2), (('replace', 'Nördlichen Wei-Dynastie', 'nördlichen Wei-Provinz'), 2)]
----------------------
original SRC word: taiwan
perturbed SRC word: Yunnan
[(('delete', 'District', ''), 2), (('replace', 'Leitung', 'richterlichen Aufsicht'), 1)]
----------------------
original SRC word: characters
perturbed SRC word: men
[(('replace', 'geschaffen haben', 'schufen'), 2), (('replace', 'Mr.', 'Herr'), 2)]
----------------------
original SRC word: characters
perturbed SRC word: people
[(('replace', 'schufen', 'geschaffen haben'), 2), (('replace', 'Mr.', 'Herr'), 2)]
----------------------
original SRC word: characters
perturbed SRC word: script
[(('replace', 'geschaffen haben', 'schufen'), 2), (('replace', 'sind nachfolgend aufgelistet', 'ist unten aufgeführt'), 1)]
----------------------
original SRC word: characters
perturbed SRC word: things
[(('replace', 'schufen', 'geschaffen haben'), 2), (('replace', 'Mr.', 'Herr'), 2)]
----------------------
original S

[(('replace', 'übernehmen', 'erfüllen'), 2), (('replace', 'Infrastruktur', 'Infrastrukturen'), 1)]
----------------------
original SRC word: hey
perturbed SRC word: No
[(('replace', 'repariert', 'fixiert'), 2), (('replace', 'deine', 'Ihre'), 1)]
----------------------
original SRC word: points
perturbed SRC word: percent
[(('replace', 'das Stadium', 'die Stufe'), 2), (('replace', 'werde', 'antworte'), 2)]
----------------------
original SRC word: pont
perturbed SRC word: Val
[(('replace', 'Street', 'Straße'), 2), (('replace', 'an der Hausnummer', 'auf Platz'), 1)]
----------------------
original SRC word: sir
perturbed SRC word: please
[(('replace', 'Knecht', 'Diener'), 2), (('replace', 'behalten Sie', 'passt auf'), 1)]
----------------------
original SRC word: desire
perturbed SRC word: ability
[(('replace', 'nach', 'auf die Insel'), 2), (('replace', 'werden', 'gewähren'), 1)]
----------------------
original SRC word: philippe
perturbed SRC word: He
[(('replace', 'unterstützt', 'zu un

[(('replace', 'Gestalt', 'Figur'), 2), (('replace', 'steht', 'gibt es'), 1)]
----------------------
original SRC word: cross
perturbed SRC word: multi
[(('replace', 'Gestalt', 'Figur'), 2), (('replace', 'Wir sollten', 'Lassen Sie uns'), 1)]
----------------------
original SRC word: cross
perturbed SRC word: name
[(('replace', 'Gestalt', 'Figur'), 2), (('replace', 'Grenze', 'Linie'), 1)]
----------------------
original SRC word: defense
perturbed SRC word: death
[(('replace', 'einer', 'eines'), 2), (('replace', 'verstärken', 'bekräftigen'), 1)]
----------------------
original SRC word: position
perturbed SRC word: opinion
[(('replace', 'Arme', 'Waffen'), 2), (('replace', 'Außenministers', 'Staatssekretärs'), 1)]
----------------------
original SRC word: haven
perturbed SRC word: won
[(('replace', 'getan', 'tun'), 2), (('replace', 'gesagt', 'sagen'), 2)]
----------------------
original SRC word: defend
perturbed SRC word: pass
[(('replace', 'Nummer', 'Nr .'), 2), (('insert', '', 'nicht i

[(('replace', 'sich', 'auf'), 2), (('replace', 'haben', 'befinden'), 1)]
----------------------
original SRC word: human
perturbed SRC word: the
[(('delete', 'wiederherzustellen', ''), 2), (('replace', 'das Potenzial der Zivilgesellschaft', 'zur Wiederherstellung des zivilgesellschaftlichen Potenzials'), 1)]
----------------------
original SRC word: human
perturbed SRC word: national
[(('delete', 'wiederherzustellen', ''), 2), (('replace', 'das Potenzial der Zivilgesellschaft', 'zur Wiederherstellung des zivilgesellschaftlichen Potenzials'), 1)]
----------------------
original SRC word: palace
perturbed SRC word: Temple
[(('replace', 'Vitelleschi-Palastes', 'Vitelleschi-Tempels'), 2), (('replace', 'Ger-Palast', 'Ger-Tempel'), 1)]
----------------------
original SRC word: district
perturbed SRC word: department
[(('replace', 'in der Russischen', ', Russische'), 2), (('delete', 'gehört', ''), 2)]
----------------------
original SRC word: district
perturbed SRC word: commune
[(('replace',

[(('insert', '', 'ihn'), 2), (('delete', 'es wird', ''), 1)]
----------------------
original SRC word: past
perturbed SRC word: night
[(('delete', 'verändert', ''), 2), (('replace', 'Wie', 'Italien spielt wie'), 1)]
----------------------
original SRC word: honor
perturbed SRC word: be
[(('replace', 'Dienst', 'Dienstleistung'), 2), (('replace', 'Tagsüber muss er', 'Er braucht tagsüber'), 1)]
----------------------
original SRC word: situation
perturbed SRC word: world
[(('replace', 'ändern', 'verändern'), 2), (('replace', 'ändert', 'verändert'), 1)]
----------------------
original SRC word: dinner
perturbed SRC word: breakfast
[(('replace', 'aßen', 'frühstückten'), 2), (('replace', 'wir', 'Wir'), 1)]
----------------------
original SRC word: computer
perturbed SRC word: science
[(('replace', 'einem', 'einer'), 2), (('insert', '', 'weiterer'), 1)]
----------------------
original SRC word: computer
perturbed SRC word: software
[(('replace', 'einem', 'einer'), 2), (('replace', 'Regierungs

[(('replace', 'Drücken', 'Push'), 2), (('replace', 'befindet', 'sind'), 1)]
----------------------
original SRC word: arm
perturbed SRC word: neck
[(('delete', 'sich', ''), 2), (('replace', 'befindet', 'ist'), 1)]
----------------------
original SRC word: results
perturbed SRC word: it
[(('replace', 'erzielen', 'erreichen'), 2), (('replace', 'der', 'sie den'), 1)]
----------------------
original SRC word: rank
perturbed SRC word: position
[(('replace', 'Hofexamen', 'Hofprüfung'), 2), (('replace', 'erreichte', 'erreicht'), 1)]
----------------------
original SRC word: rank
perturbed SRC word: place
[(('replace', 'Platz', 'Jahr'), 2), (('replace', 'was', 'der'), 1)]
----------------------
original SRC word: watch
perturbed SRC word: turn
[(('delete', 'sich', ''), 2), (('delete', 'heute Abend', ''), 2)]
----------------------
original SRC word: watch
perturbed SRC word: see
[(('delete', 'heute Abend', ''), 2), (('replace', 'schaut', 'blickt'), 1)]
----------------------
original SRC word:

[(('replace', 'Das Publikum', 'Die Öffentlichkeit'), 2), (('replace', 'Vergessen-Taste', 'Vergissmeinnicht-Taste'), 1)]
----------------------
original SRC word: bar
perturbed SRC word: place
[(('replace', 'Das Publikum', 'Die Öffentlichkeit'), 2), (('replace', 'im Six', 'auf der Sechs'), 1)]
----------------------
original SRC word: bar
perturbed SRC word: restaurant
[(('replace', 'Das Publikum', 'Die Öffentlichkeit'), 2), (('replace', 'Vergessen-Taste', 'vergessen-Taste'), 1)]
----------------------
original SRC word: euros
perturbed SRC word: hundred
[(('replace', 'beträgt', 'liegt bei'), 2), (('replace', 'Der Tageslohn liegt bei', 'Die tägliche Auszahlung beträgt'), 1)]
----------------------
original SRC word: beauty
perturbed SRC word: way
[(('replace', 'verbergen', 'verstecken'), 2), (('insert', '', 'nachdenken'), 2)]
----------------------
original SRC word: leaves
perturbed SRC word: had
[(('insert', '', 'erreicht'), 2), (('replace', 'Pfad', 'Weg'), 1)]
----------------------


[(('replace', 'bösartige', 'bösartigen'), 2), (('replace', 'es', 'er'), 1)]
----------------------
original SRC word: accident
perturbed SRC word: explosion
[(('replace', 'der', 'einer'), 2), (('delete', 'einer', ''), 2)]
----------------------
original SRC word: accident
perturbed SRC word: idea
[(('delete', 'einer', ''), 2), (('replace', 'Dateien', 'Datendateien'), 1)]
----------------------
original SRC word: yesterday
perturbed SRC word: that
[(('insert', '', 'gingen'), 2), (('replace', 'zerbrochen', 'kaputt ging'), 1)]
----------------------
original SRC word: favor
perturbed SRC word: need
[(('replace', 'tun', 'erfüllen'), 2), (('replace', 'tue', 'mach'), 2)]
----------------------
original SRC word: relationship
perturbed SRC word: contact
[(('replace', 'diese', 'dieser'), 2), (('replace', 'in der Tat', 'tatsächlich'), 1)]
----------------------
original SRC word: favor
perturbed SRC word: charge
[(('replace', 'tun', 'machen'), 2), (('replace', 'tue', 'mach'), 2)]
--------------

[(('insert', '', 'Das Wort hat'), 2), (('delete', 'hat das Wort', ''), 2)]
----------------------
original SRC word: reporter
perturbed SRC word: President
[(('insert', '', 'Das Wort hat'), 2), (('delete', 'hat das Wort', ''), 2)]
----------------------
original SRC word: met
perturbed SRC word: saw
[(('delete', 'sich', ''), 2), (('replace', 'Ihrem', 'Ihr'), 1)]
----------------------
original SRC word: met
perturbed SRC word: worked
[(('delete', 'sich', ''), 2), (('replace', 'Ihrem', 'Ihr'), 1)]
----------------------
original SRC word: fall
perturbed SRC word: be
[(('insert', '', 'es'), 2), (('delete', 'sich', ''), 1)]
----------------------
original SRC word: aim
perturbed SRC word: goal
[(('replace', 'ist es', 'besteht darin'), 2), (('replace', 'Umweltverträglichkeitsprüfung', 'Umweltprüfung'), 1)]
----------------------
original SRC word: madam
perturbed SRC word: the
[(('replace', ', die Änderungsantrag', 'zur Unterstützung des Änderungsantrags'), 2), (('delete', 'unterstützt', '

[(('replace', 'sagen', 'erzählen'), 2), (('replace', 'sagen', 'erklären'), 2)]
----------------------
original SRC word: truth
perturbed SRC word: fact
[(('replace', 'sagen', 'erzählen'), 2), (('replace', 'ferne', 'entfernte'), 1)]
----------------------
original SRC word: trust
perturbed SRC word: understand
[(('replace', 'ihnen', 'sie'), 2), (('replace', 'meinem', 'meinen'), 1)]
----------------------
original SRC word: trust
perturbed SRC word: see
[(('replace', 'ihnen', 'sie'), 2), (('replace', 'meinem', 'meinen'), 1)]
----------------------
original SRC word: june
perturbed SRC word: July
[(('replace', 'wird', 'dauert'), 2), (('replace', 'regnete', 'hat'), 1)]
----------------------
original SRC word: ticket
perturbed SRC word: street
[(('replace', 'jedem Bahnhof', 'jeder Station'), 2), (('replace', 'es', 'sie'), 2)]
----------------------
original SRC word: time
perturbed SRC word: That
[(('replace', 'Es', 'Das'), 2), (('replace', 'es', 'dies'), 1)]
----------------------
origina

[(('insert', '', 'dass wir'), 2), (('delete', ', Vorschläge', ''), 1)]
----------------------
original SRC word: john
perturbed SRC word: you
[(('replace', 'Weiß', 'weiß'), 2), (('replace', 'hallo', 'Hallo'), 1)]
----------------------
original SRC word: thinks
perturbed SRC word: cares
[(('replace', 'zu wissen', 'weiß'), 2), (('insert', '', 'dass er'), 2)]
----------------------
original SRC word: thinks
perturbed SRC word: said
[(('insert', '', 'dass er'), 2), (('insert', '', 'er habe'), 2)]
----------------------
original SRC word: thinks
perturbed SRC word: says
[(('insert', '', 'er wisse'), 2), (('delete', 'zu wissen', ''), 2)]
----------------------
original SRC word: try
perturbed SRC word: learn
[(('delete', 'es', ''), 2), (('replace', 'Krämer', 'Krächer'), 1)]
----------------------
original SRC word: editorial
perturbed SRC word: publisher
[(('replace', 'Herr Präsident', 'Mister President'), 2), (('insert', '', ', Berichterstatter ,'), 2)]
----------------------
original SRC 

[(('insert', '', 'auf dem Tisch'), 2), (('delete', 'auf den Tisch', ''), 2)]
----------------------
original SRC word: read
perturbed SRC word: see
[(('replace', 'Sie haben', 'Man hat'), 1), (('insert', '', 'aussieht'), 1)]
----------------------
original SRC word: read
perturbed SRC word: were
[(('replace', 'hat es', 'gewonnen tut'), 1), (('replace', 'geschadet', 'weh'), 1)]
----------------------
original SRC word: read
perturbed SRC word: write
[(('replace', 'wird', 'weiß'), 1), (('replace', 'stand', 'war'), 1)]
----------------------
original SRC word: project
perturbed SRC word: work
[(('replace', 'wurde in zweieinhalb Jahren auf den Weg gebracht', 'eingeführt'), 1), (('replace', 'Immer', 'Kollaborieren Sie immer'), 1)]
----------------------
original SRC word: reading
perturbed SRC word: doing
[(('replace', 'bei der Vesperstunde', 'zur Vesperzeit'), 1), (('replace', 'Vergessen-Taste', 'vergessen-Taste'), 1)]
----------------------
original SRC word: reasons
perturbed SRC word: th

[(('delete', 'kleine Dörfer', ''), 1), (('delete', 'in Auftrag zu geben , die', ''), 1)]
----------------------
original SRC word: reasons
perturbed SRC word: concerned
[(('replace', 'dieser Art von Arrangement', 'einer derartigen Regelung'), 1), (('replace', 'negative', 'ablehnende'), 1)]
----------------------
original SRC word: refer
perturbed SRC word: belong
[(('replace', 'enthält', 'hat'), 1), (('replace', 'sich auf folgende', 'zu folgenden'), 1)]
----------------------
original SRC word: position
perturbed SRC word: mood
[(('replace', 'mitgeteilt', 'gesagt'), 1), (('delete', 'ihm ein', ''), 1)]
----------------------
original SRC word: receive
perturbed SRC word: hear
[(('replace', 'rechtfertigenden', 'rechtfertigende'), 1), (('replace', 'einen juristischen', 'ein Jurastudium'), 1)]
----------------------
original SRC word: receive
perturbed SRC word: read
[(('replace', 'rechtfertigenden', 'rechtfertigende'), 1), (('replace', 'einen juristischen', 'Jura'), 1)]
------------------

[(('replace', 'ausweiten', 'erweitern'), 1), (('insert', '', 'ich werde'), 1)]
----------------------
original SRC word: product
perturbed SRC word: creature
[(('replace', 'werden', 'konkurrieren'), 1), (('insert', '', 'es'), 1)]
----------------------
original SRC word: president
perturbed SRC word: law
[(('replace', ', Frau', 'Madam'), 1), (('replace', 'komme zum Schluss', 'bin am Ende'), 1)]
----------------------
original SRC word: pressure
perturbed SRC word: blood
[(('replace', 'eine Vorrichtung', 'ein Gerät'), 1), (('replace', '`` den Durchfluss', 'Durchflussrate'), 1)]
----------------------
original SRC word: rain
perturbed SRC word: and
[(('replace', 'folgte', ', gab es'), 1), (('replace', 'Es', '< unk > Das'), 1)]
----------------------
original SRC word: pressure
perturbed SRC word: it
[(('replace', 'lag', 'gab'), 1), (('replace', 'War', 'Hat sich'), 1)]
----------------------
original SRC word: race
perturbed SRC word: game
[(('replace', 'pünktlich', 'auf der Zeit'), 1), (

[(('replace', 'Frau', 'Madam'), 1), (('replace', 'ziehe', 'nehme'), 1)]
----------------------
original SRC word: power
perturbed SRC word: way
[(('replace', 'waren', ', mussten'), 1), (('replace', 'vertraut sind', 'kennen'), 1)]
----------------------
original SRC word: province
perturbed SRC word: city
[(('replace', 'unterstand der', ','), 1), (('replace', 'Im Osten', 'Östlich'), 1)]
----------------------
original SRC word: programs
perturbed SRC word: activities
[(('replace', 'verknüpft', 'verbunden'), 1), (('replace', 'zur Verfügung', 'zugänglich'), 1)]
----------------------
original SRC word: provide
perturbed SRC word: give
[(('replace', 'für erschwinglichere Produkte', 'an erschwinglicheren Produkten'), 1), (('delete', 'zur Verfügung', ''), 1)]
----------------------
original SRC word: program
perturbed SRC word: curriculum
[(('replace', 'durchgeführt werden', 'geführt'), 1), (('replace', 'nehmen an', 'stehen auf'), 1)]
----------------------
original SRC word: program
perturb

[(('replace', 'abbiegenden Richter', 'Abbieger'), 1), (('insert', '', 'sich'), 1)]
----------------------
original SRC word: production
perturbed SRC word: other
[(('replace', 'Lieferung', 'die Erbringung'), 1), (('replace', 'Überschuss', 'Exzess'), 1)]
----------------------
original SRC word: production
perturbed SRC word: Encyclopedia
[(('replace', 'Lieferung', 'der Bereitstellung'), 1), (('replace', 'Output', 'Ausgabe'), 1)]
----------------------
original SRC word: rain
perturbed SRC word: water
[(('replace', 'folgte', 'gab es'), 1), (('replace', 'möchte wissen', 'wüsste gern'), 1)]
----------------------
original SRC word: product
perturbed SRC word: the
[(('replace', 'dem', 'denen'), 1), (('replace', 'Suiten-Potenziale', 'Suite-Potenziale'), 1)]
----------------------
original SRC word: present
perturbed SRC word: provide
[(('insert', '', 'ihren Nutzern'), 1), (('replace', 'abbiegenden Richter', 'Abbiegespuren'), 1)]
----------------------
original SRC word: products
perturbed S

[(('replace', 'Dieses Werk', '`` Diese Arbeit'), 1), (('delete', 'Unendlichkeit', ''), 1)]
----------------------
original SRC word: nature
perturbed SRC word: me
[(('replace', 'Dieses Werk', '`` Diese Arbeit'), 1), (('replace', 'Unendlichkeit', 'unendlich'), 1)]
----------------------
original SRC word: nature
perturbed SRC word: nowhere
[(('replace', 'ging', 'stieg'), 1), (('replace', 'Unendlichkeit', 'unendlich'), 1)]
----------------------
original SRC word: need
perturbed SRC word: had
[(('replace', 'Baumjungen Informationen finden', 'Baumburschen informieren'), 1), (('insert', '', 'ohne Kundenzufriedenheit'), 1)]
----------------------
original SRC word: need
perturbed SRC word: have
[(('replace', 'Sehen Sie', 'Seht her'), 1), (('insert', '', ', das kann ich euch geben'), 1)]
----------------------
original SRC word: need
perturbed SRC word: try
[(('replace', 'rennen', 'zu laufen'), 1), (('replace', 'vor mir', 'unerbittlich zu beschreiten'), 1)]
----------------------
original SR

[(('replace', 'Chengmen-Kapitäns', 'Chengmen Kapitäns'), 1), (('replace', 'Das Königreich', 'Die Herrschaft'), 1)]
----------------------
original SRC word: north
perturbed SRC word: south
[(('replace', 'Chengmen-Kapitäns', 'Chengmen Hauptmanns'), 1), (('replace', 'beeinflusst', 'wirkt sich weiterhin auf'), 1)]
----------------------
original SRC word: north
perturbed SRC word: west
[(('replace', 'Chengmen-Kapitäns', 'Chengmen Kapitäns'), 1), (('replace', 'Das Königreich', 'Die Herrschaft'), 1)]
----------------------
original SRC word: note
perturbed SRC word: care
[(('delete', 'Ihnen', ''), 1), (('replace', 'es mir ermöglicht ,', 'ich'), 1)]
----------------------
original SRC word: note
perturbed SRC word: level
[(('delete', 'sich', ''), 1), (('replace', 'Ihnen', 'dir'), 1)]
----------------------
original SRC word: note
perturbed SRC word: message
[(('replace', 'schmelzen', 'zum Schmelzen'), 1), (('replace', 'dies', 'das'), 1)]
----------------------
original SRC word: note
perturb

[(('replace', 'außer', 'es neben'), 1), (('replace', 'gemacht', 'geschlossen'), 1)]
----------------------
original SRC word: noise
perturbed SRC word: blast
[(('replace', 'Herr', 'Sir'), 1), (('replace', 'kann', 'hält'), 1)]
----------------------
original SRC word: moreover
perturbed SRC word: however
[(('delete', 'es', ''), 1), (('replace', 'Personalisierung der grafischen', 'grafische'), 1)]
----------------------
original SRC word: moreover
perturbed SRC word: However
[(('delete', 'es', ''), 1), (('replace', 'Personalisierung der grafischen', 'grafische'), 1)]
----------------------
original SRC word: object
perturbed SRC word: subject
[(('replace', 'fiel', 'ging'), 1), (('replace', 'die Herkunft des', 'den Ursprung dieses'), 1)]
----------------------
original SRC word: meters
perturbed SRC word: inches
[(('insert', '', 'Säulen durchgeführt , die'), 1), (('replace', 'Meistens', 'Die meiste Zeit'), 1)]
----------------------
original SRC word: members
perturbed SRC word: leaders
[

[(('replace', 'Die', 'Ti ignorieren die'), 1), (('replace', 'gesamten', 'ganzen'), 1)]
----------------------
original SRC word: means
perturbed SRC word: meant
[(('replace', 'hat begonnen ,', 'wurde'), 1), (('replace', 'die Ära', 'das Zeitalter'), 1)]
----------------------
original SRC word: means
perturbed SRC word: was
[(('replace', 'Die', 'Ti ignorieren die'), 1), (('replace', 'gesamten', 'ganzen'), 1)]
----------------------
original SRC word: measure
perturbed SRC word: There
[(('insert', '', 'Es wird'), 1), (('replace', 'überrascht sein', 'überraschen'), 1)]
----------------------
original SRC word: measure
perturbed SRC word: add
[(('insert', '', 'Es wird'), 1), (('replace', 'überrascht sein', 'überraschen'), 1)]
----------------------
original SRC word: measure
perturbed SRC word: recorder
[(('insert', '', 'Es wird'), 1), (('replace', 'überrascht sein', 'überraschen'), 1)]
----------------------
original SRC word: measure
perturbed SRC word: system
[(('insert', '', 'Es wird')

[(('replace', 'sie leuchtete', 'es schien'), 1), (('replace', 'aufgestiegen', 'aufgegangen'), 1)]
----------------------
original SRC word: moon
perturbed SRC word: world
[(('replace', 'fahren', 'brechen'), 1), (('replace', 'leuchtete', 'strahlte'), 1)]
----------------------
original SRC word: mom
perturbed SRC word: father
[(('insert', '', 'frühen'), 1), (('replace', ', sie', '. Sie'), 1)]
----------------------
original SRC word: model
perturbed SRC word: share
[(('replace', 'erschien', 'schien'), 1), (('replace', 'glorreiche', 'der glorreichen'), 1)]
----------------------
original SRC word: mind
perturbed SRC word: runs
[(('insert', '', 'Was stimmt nicht :'), 1), (('replace', 'suchte', 'suchten'), 1)]
----------------------
original SRC word: model
perturbed SRC word: movement
[(('replace', 'glorreiche', 'ruhmreicher'), 1), (('replace', 'es', 'sie'), 1)]
----------------------
original SRC word: mind
perturbed SRC word: soul
[(('replace', 'eines', 'einer'), 1), (('replace', 'kamen

[(('replace', 'perfektes', 'perfektioniertes'), 1), (('replace', 'es', 'das'), 1)]
----------------------
original SRC word: perfect
perturbed SRC word: little
[(('replace', 'perfektes', 'perfektioniertes'), 1), (('replace', 'irgendetwas', 'etwas'), 1)]
----------------------
original SRC word: perfect
perturbed SRC word: right
[(('replace', 'tiefen', 'tief verwurzelten'), 1), (('insert', '', 'es'), 1)]
----------------------
original SRC word: performance
perturbed SRC word: use
[(('replace', 'Vorbereitung', 'Zubereitung'), 1), (('replace', 'Organisationsgruppen', 'Organisationen Gruppen'), 1)]
----------------------
original SRC word: parliament
perturbed SRC word: Commission
[(('replace', 'einem Gemeinschaftsgericht', 'eines Gemeinschaftsgerichts'), 1), (('replace', 'adliger Träger', 'Adel'), 1)]
----------------------
original SRC word: performance
perturbed SRC word: work
[(('replace', 'Es', 'Das'), 1), (('replace', 'Der zentrale', 'Den zentralen'), 1)]
----------------------
orig

[(('replace', 'lassen', 'bringen'), 1), (('replace', 'verlangten zum Zeitpunkt', 'hielten kurz inne und fragten im Moment'), 1)]
----------------------
original SRC word: pay
perturbed SRC word: give
[(('delete', 'an wen', ''), 1), (('replace', 'verlangten zum Zeitpunkt', 'hielten kurz inne und baten im Moment'), 1)]
----------------------
original SRC word: phone
perturbed SRC word: missed
[(('replace', 'anrufen', 'vermissen'), 1), (('replace', 'fing', 'erwischte'), 1)]
----------------------
original SRC word: pick
perturbed SRC word: bring
[(('replace', 'dir gehören', 'eure sind'), 1), (('replace', 'das Chips-Paket', 'die Chips-Packung'), 1)]
----------------------
original SRC word: pick
perturbed SRC word: had
[(('insert', '', 'spielen'), 1), (('replace', 'ab', 'gepackt'), 1)]
----------------------
original SRC word: plant
perturbed SRC word: fish
[(('replace', 'Gehen Sie hinauf zur', 'Steh auf zu den'), 1), (('replace', 'Triebe', 'Sprossen'), 1)]
----------------------
original 

[(('replace', 'Foata-Platz', 'Foata-Rang'), 1), (('replace', 'erwacht', 'erweckt wird'), 1)]
----------------------
original SRC word: place
perturbed SRC word: seat
[(('replace', 'Foata-Platz', 'Foata-Sitz'), 1), (('replace', 'hundert', 'einhundert'), 1)]
----------------------
original SRC word: places
perturbed SRC word: areas
[(('replace', 'Welt', 'Erde'), 1), (('replace', 'an pädagogischen', 'in Bildungs-'), 1)]
----------------------
original SRC word: places
perturbed SRC word: cities
[(('replace', 'der Klappstühle .', 'von Klappstühlen'), 1), (('delete', 'wiederkehrende', ''), 1)]
----------------------
original SRC word: places
perturbed SRC word: occasions
[(('replace', 'Reihen der Klappstühle', 'Stuhlreihen'), 1), (('replace', 'Erstmals', 'Zum ersten Mal'), 1)]
----------------------
original SRC word: places
perturbed SRC word: seats
[(('delete', 'wiederkehrende', ''), 1), (('replace', 'Man denke', 'Schauen Sie sich'), 1)]
----------------------
original SRC word: plan
pert

[(('replace', 'höre auf', 'werde aufhören'), 1), (('replace', 'draußen bei der Oma', 'außerhalb von Omas'), 1)]
----------------------
original SRC word: okay
perturbed SRC word: Well
[(('replace', 'höre auf', 'werde aufhören'), 1), (('replace', "los geht 's", 'fangen wir an'), 1)]
----------------------
original SRC word: okay
perturbed SRC word: Oh
[(('replace', 'bete', 'komme . Bete'), 1), (('replace', 'Sie müssen', 'Du musst'), 1)]
----------------------
original SRC word: objective
perturbed SRC word: job
[(('replace', 'die Streichung von', 'es ,'), 1), (('insert', '', 'des Quartals zu streichen'), 1)]
----------------------
original SRC word: objective
perturbed SRC word: purpose
[(('replace', 'Novelle', 'Änderung'), 1), (('insert', '', 'des Quartals'), 1)]
----------------------
original SRC word: october
perturbed SRC word: August
[(('replace', 'eine Zunahme', 'einen Anstieg'), 1), (('replace', 'Erstmals präsentiert', '< unk > Erstaufführung'), 1)]
----------------------
origin

[(('replace', '`` Nimm', '-Nimm'), 1), (('replace', 'Grigori', 'Grigoriy'), 1)]
----------------------
original SRC word: paper
perturbed SRC word: friends
[(('replace', 'durchsuchte', 'sortierte'), 1), (('replace', '`` Nimm', '-Nimm'), 1)]
----------------------
original SRC word: paper
perturbed SRC word: table
[(('replace', 'durchsuchte', 'sortierte'), 1), (('replace', 'Schublade', 'ihrer Schubladen von'), 1)]
----------------------
original SRC word: paper
perturbed SRC word: valid
[(('replace', '`` Nimm', '-Nehmen Sie'), 1), (('replace', 'reichte', 'händigte'), 1)]
----------------------
original SRC word: parents
perturbed SRC word: family
[(('replace', 'kamen', 'kam'), 1), (('replace', 'kümmerten', 'kümmerte'), 1)]
----------------------
original SRC word: parents
perturbed SRC word: father
[(('replace', 'kamen', 'kam'), 1), (('replace', 'kümmerten', 'kümmerte'), 1)]
----------------------
original SRC word: parents
perturbed SRC word: friends
[(('replace', 'Änderungen ausnahmsl

[(('delete', 'ertönte', ''), 1), (('replace', 'gestickt', 'aufgestickt'), 1)]
----------------------
original SRC word: today
perturbed SRC word: it
[(('delete', 'wieder gebraucht', ''), 1), (('replace', 'Ich', 'Was mich'), 1)]
----------------------
original SRC word: today
perturbed SRC word: here
[(('replace', 'der', 'den es'), 1), (('replace', 'existiert', 'gibt'), 1)]
----------------------
original SRC word: today
perturbed SRC word: Now
[(('replace', 'heruntergeladen werden', 'besorgen'), 1), (('replace', 'habe ich siegreiche', 'einen siegreichen'), 1)]
----------------------
original SRC word: today
perturbed SRC word: However
[(('insert', '', 'hier anwesenden'), 1), (('replace', 'der', 'den es'), 1)]
----------------------
original SRC word: timur
perturbed SRC word: she
[(('replace', 'ging', 'brachte'), 1), (('replace', 'der', 'die vor ihm'), 1)]
----------------------
original SRC word: timur
perturbed SRC word: me
[(('delete', 'ertönte', ''), 1), (('replace', 'ging', 'brach

[(('delete', 'er hat', ''), 1), (('replace', 'du bist', 'man sei'), 1)]
----------------------
original SRC word: thirteen
perturbed SRC word: Paris
[(('insert', '', 'das Wort'), 1), (('delete', 'das Wort', ''), 1)]
----------------------
original SRC word: thirteen
perturbed SRC word: three
[(('delete', 'Straße', ''), 1), (('insert', '', 'Straße'), 1)]
----------------------
original SRC word: thirteen
perturbed SRC word: two
[(('replace', 'eintausend neunhundertvierundvierzig', 'eintausendneunhundert vierundvierzig'), 1), (('delete', 'brannte der Wald', ''), 1)]
----------------------
original SRC word: thought
perturbed SRC word: decided
[(('insert', '', 'mich'), 1), (('replace', 'wiederfinden', 'finden'), 1)]
----------------------
original SRC word: thought
perturbed SRC word: think
[(('replace', 'wiederfinden', 'finden'), 1), (('replace', 'sagt', 'hat'), 1)]
----------------------
original SRC word: thoughts
perturbed SRC word: emotions
[(('delete', 'nicht denken', ''), 1), (('in

[(('replace', 'es', 'ihn'), 1), (('replace', 'Mozilla-Koordinationsteam', 'Mozilla-Koordinierungsteam'), 1)]
----------------------
original SRC word: towards
perturbed SRC word: into
[(('replace', 'er allmählich', 'es'), 1), (('replace', 'abgelegenen Beckens', 'abgelegene Becken'), 1)]
----------------------
original SRC word: traffic
perturbed SRC word: car
[(('replace', 'Warnblinkanlage', 'den Betriebswarnleuchten'), 1), (('replace', 'gefährdet', 'in Gefahr'), 1)]
----------------------
original SRC word: town
perturbed SRC word: village
[(('replace', 'Tor', 'Gate'), 1), (('replace', 'In der', 'Eine Stunde lang fiel im'), 1)]
----------------------
original SRC word: town
perturbed SRC word: community
[(('replace', 'In', 'Eine Stunde lang fiel in'), 1), (('replace', 'khuree-Stadt', 'khuree-Gemeinde'), 1)]
----------------------
original SRC word: town
perturbed SRC word: city
[(('replace', 'Tor', 'Gate'), 1), (('replace', 'Eine', 'Die Messe kam für eine'), 1)]
----------------------

[(('insert', '', 'Nr .'), 1), (('insert', '', 'Das Wort haben Sie ,'), 1)]
----------------------
original SRC word: theater
perturbed SRC word: building
[(('replace', 'machen', 'bewerkstelligen'), 1), (('replace', 'Engelsprozession', 'Prozession von Engeln'), 1)]
----------------------
original SRC word: system
perturbed SRC word: project
[(('insert', '', ', die'), 1), (('replace', 'soziale', 'Vorbereitung des sozialen'), 1)]
----------------------
original SRC word: table
perturbed SRC word: fireplace
[(('replace', 'liegt', 'gibt'), 1), (('replace', 'Auf', 'Es gab Toffee auf'), 1)]
----------------------
original SRC word: table
perturbed SRC word: door
[(('replace', 'liegt', 'gibt'), 1), (('replace', 'Glocke', 'Klingel'), 1)]
----------------------
original SRC word: table
perturbed SRC word: desk
[(('replace', 'Glocke', 'Klingel'), 1), (('replace', 'Setzen', 'Sitzen'), 1)]
----------------------
original SRC word: table
perturbed SRC word: bed
[(('replace', 'liegt', 'gibt'), 1), ((

[(('insert', '', 'die Inspirierten'), 1), (('replace', 'erlangten', 'bekamen'), 1)]
----------------------
original SRC word: table
perturbed SRC word: floor
[(('replace', 'liegt', 'gibt'), 1), (('replace', 'aus dem Glas', 'des Glases'), 1)]
----------------------
original SRC word: taiwan
perturbed SRC word: Beijing
[(('replace', 'Leitung', 'richterlichen Aufsicht'), 1), (('replace', 'Fernsehsystem', 'Fernsehen'), 1)]
----------------------
original SRC word: taiwan
perturbed SRC word: China
[(('replace', ', Bezirk', 'im Kreis'), 1), (('replace', 'liegt', 'befindet sich'), 1)]
----------------------
original SRC word: ten
perturbed SRC word: one
[(('replace', 'Aufstehen', 'Steh auf'), 1), (('delete', 'estate', ''), 1)]
----------------------
original SRC word: term
perturbed SRC word: time
[(('replace', 'bezeichnet', 'bezieht sich'), 1), (('replace', 'in Bezug auf Technologie bereits verwendet', 'genutzt'), 1)]
----------------------
original SRC word: term
perturbed SRC word: sentenc

[(('replace', 'Jungs', 'Kerle'), 1), (('replace', 'Dafür', 'Ich'), 1)]
----------------------
original SRC word: talk
perturbed SRC word: go
[(('replace', 'Gleich', 'In Kürze'), 1), (('replace', 'über den', 'uns mit dem'), 1)]
----------------------
original SRC word: taiwan
perturbed SRC word: Japan
[(('replace', 'PRC', 'VR China'), 1), (('replace', 'Klasse', 'Note'), 1)]
----------------------
original SRC word: taxes
perturbed SRC word: attention
[(('insert', '', 'etwas'), 1), (('insert', '', ', Herr Laurent'), 1)]
----------------------
original SRC word: tea
perturbed SRC word: coffee
[(('delete', 'im Wesentlichen', ''), 1), (('insert', '', 'im Wesentlichen'), 1)]
----------------------
original SRC word: tea
perturbed SRC word: it
[(('delete', 'im Wesentlichen', ''), 1), (('insert', '', 'im Wesentlichen'), 1)]
----------------------
original SRC word: tea
perturbed SRC word: water
[(('delete', 'im Wesentlichen', ''), 1), (('insert', '', 'im Wesentlichen'), 1)]
-------------------

[(('replace', 'teilnehmen', 'anschließen'), 1), (('replace', 'schickten', 'sandte'), 1)]
----------------------
original SRC word: women
perturbed SRC word: men
[(('replace', 'Klagen', 'Wehklagen'), 1), (('replace', 'Horror', 'Schrecken der Kavallerie'), 1)]
----------------------
original SRC word: women
perturbed SRC word: children
[(('replace', 'Klagen', 'Wehklagen'), 1), (('replace', 'Horror', 'Schrecken der Kavallerie'), 1)]
----------------------
original SRC word: women
perturbed SRC word: I
[(('insert', '', 'der Kavallerie'), 1), (('replace', 'stürmte', 'angriff'), 1)]
----------------------
original SRC word: windows
perturbed SRC word: machines
[(('replace', 'lehnen', ', die sich'), 1), (('insert', '', 'lehnen'), 1)]
----------------------
original SRC word: windows
perturbed SRC word: it
[(('replace', 'lehnen', ', die sich'), 1), (('insert', '', 'lehnen'), 1)]
----------------------
original SRC word: windows
perturbed SRC word: feet
[(('replace', 'lehnen', ', die sich'), 1)

[(('replace', 'Pingdong-General', 'Pingdong General'), 1), (('replace', 'Jangtse-Flusses', 'Jangtse'), 1)]
----------------------
original SRC word: year
perturbed SRC word: season
[(('insert', '', 'lunaren'), 1), (('replace', 'Pingdong-General', 'Pingdong General'), 1)]
----------------------
original SRC word: year
perturbed SRC word: night
[(('replace', 'ersten', '1 .'), 1), (('replace', 'Erstmals', 'Zum ersten Mal'), 1)]
----------------------
original SRC word: year
perturbed SRC word: month
[(('insert', '', 'Mondmonat'), 1), (('replace', 'Pingdong-General', 'Pingdong General'), 1)]
----------------------
original SRC word: year
perturbed SRC word: day
[(('replace', 'Pingdong-General', 'Pingdong General'), 1), (('replace', 'Jangtse-Flusses', 'Jangtse'), 1)]
----------------------
original SRC word: yellow
perturbed SRC word: white
[(('delete', 'impliziert also', ''), 1), (('replace', 'Sind', 'Wurden'), 1)]
----------------------
original SRC word: yes
perturbed SRC word: No
[(('re

[(('replace', 'die Bekämpfung betrügerischer', 'darin bestehen , betrügerische'), 1), (('insert', '', 'zu bekämpfen'), 1)]
----------------------
original SRC word: wouldn
perturbed SRC word: couldn
[(('replace', 'aufgegangen', 'aufgehen können'), 1), (('replace', 'die Bekämpfung betrügerischer', 'darin bestehen , betrügerische'), 1)]
----------------------
original SRC word: wouldn
perturbed SRC word: didn
[(('replace', 'versuchen', 'versucht'), 1), (('delete', 'gehen', ''), 1)]
----------------------
original SRC word: wouldn
perturbed SRC word: don
[(('delete', 'nicht in der Lage ,', ''), 1), (('replace', 'umzusetzen', 'nicht umsetzen'), 1)]
----------------------
original SRC word: week
perturbed SRC word: night
[(('delete', 'Bildkarte , die', ''), 1), (('replace', 'zu sehen war -', 'befindliche Bildkarte --'), 1)]
----------------------
original SRC word: week
perturbed SRC word: day
[(('delete', 'gekocht', ''), 1), (('replace', 'Die Bezahlung', 'Bezahlt'), 1)]
-------------------

[(('replace', 'viele', 'eine Menge'), 1), (('insert', '', 'der Künste'), 1)]
----------------------
original SRC word: union
perturbed SRC word: movement
[(('replace', 'werden', 'bringen'), 1), (('replace', 'Infrastruktur', 'Infrastrukturen'), 1)]
----------------------
original SRC word: value
perturbed SRC word: significance
[(('replace', 'vom selben', 'von demselben'), 1), (('replace', 'steigern', 'erhöhen'), 1)]
----------------------
original SRC word: union
perturbed SRC word: Parliament
[(('replace', 'Treffen', 'Sitzung'), 1), (('delete', ', die eine schlechtere Note haben als ich', ''), 1)]
----------------------
original SRC word: union
perturbed SRC word: Council
[(('replace', 'dem letzten Treffen', 'der vorangegangenen Tagung'), 1), (('replace', 'Infrastruktur implementiert', 'Infrastrukturen eingeführt'), 1)]
----------------------
original SRC word: union
perturbed SRC word: Commission
[(('replace', 'Treffen', 'Sitzung'), 1), (('delete', 'der Straße', ''), 1)]
------------

[(('replace', 'aufgrund ungünstiger', 'wegen widriger'), 1), (('delete', ', gehen sie spazieren', ''), 1)]
----------------------
original SRC word: weather
perturbed SRC word: water
[(('replace', 'Wenn', 'Sie gehen spazieren , wenn'), 1), (('delete', ', gehen sie spazieren', ''), 1)]
----------------------
original SRC word: web
perturbed SRC word: Internet
[(('replace', 'Benutzer', 'Nutzer'), 1), (('replace', 'die Teilnahme an', 'Mitmachen in'), 1)]
----------------------
original SRC word: web
perturbed SRC word: The
[(('replace', 'Sie', 'Es'), 1), (('replace', 'Hersteller', 'Entscheidungsträger'), 1)]
----------------------
original SRC word: web
perturbed SRC word: internet
[(('replace', 'Benutzer', 'Nutzer'), 1), (('replace', 'die Teilnahme an', 'Mitmachen in'), 1)]
----------------------
original SRC word: web
perturbed SRC word: world
[(('replace', 'Benutzer', 'Nutzer'), 1), (('replace', 'die Teilnahme an', 'Mitmachen in'), 1)]
----------------------
original SRC word: wedding


[(('insert', '', 'es'), 1), (('replace', 'zu verleihen', 'hinzuzufügen'), 1)]
----------------------
original SRC word: secret
perturbed SRC word: tail
[(('insert', '', 'es'), 1), (('replace', 'zu verleihen', 'hinzuzufügen'), 1)]
----------------------
original SRC word: secret
perturbed SRC word: safe
[(('insert', '', 'bei sich'), 1), (('replace', 'zu verleihen', 'hinzuzufügen'), 1)]
----------------------
original SRC word: seconds
perturbed SRC word: weeks
[(('replace', 'treten', 'werden'), 1), (('replace', 'waren', 'saßen'), 1)]
----------------------
original SRC word: seconds
perturbed SRC word: moments
[(('replace', 'Kontakt', 'Berührung'), 1), (('replace', 'treten', 'werden'), 1)]
----------------------
original SRC word: seconds
perturbed SRC word: minutes
[(('replace', 'Kontakt', 'Berührung'), 1), (('replace', 'treten', 'werden'), 1)]
----------------------
original SRC word: security
perturbed SRC word: stability
[(('replace', 'Erörterung', 'Prüfung'), 1), (('replace', 'gewä

[(('insert', '', 'beigebracht'), 1), (('replace', 'Haben', 'Genießen'), 1)]
----------------------
original SRC word: school
perturbed SRC word: work
[(('replace', 'Anzahl', 'Zahl'), 1), (('delete', 'man', ''), 1)]
----------------------
original SRC word: science
perturbed SRC word: God
[(('replace', 'sie', 'man'), 1), (('replace', 'sie', 'er'), 1)]
----------------------
original SRC word: science
perturbed SRC word: It
[(('insert', '', 'es'), 1), (('insert', '', 'tun'), 1)]
----------------------
original SRC word: sea
perturbed SRC word: The
[(('replace', 'pfiff', 'pfeifte'), 1), (('insert', '', 'ich'), 1)]
----------------------
original SRC word: sea
perturbed SRC word: horizon
[(('replace', 'pfiff', 'pfeifte'), 1), (('replace', 'Verursacht einen Rückgang', 'Eine Verringerung'), 1)]
----------------------
original SRC word: sea
perturbed SRC word: water
[(('replace', 'ging', 'stieg'), 1), (('replace', 'Steigung', 'Hang'), 1)]
----------------------
original SRC word: search
pertu

[(('replace', 'Kommentare', 'Kommentar'), 1), (('replace', 'konsequentesten', 'folgerichtigsten'), 1)]
----------------------
original SRC word: sentences
perturbed SRC word: books
[(('replace', 'Vergehen', 'Beleidigung'), 1), (('replace', 'Überprüfungen durchgeführt', 'Rezensionen gemacht'), 1)]
----------------------
original SRC word: sentence
perturbed SRC word: term
[(('replace', 'abgeschaltete', 'abgetrennte'), 1), (('replace', 'Helfer', 'Assistenten'), 1)]
----------------------
original SRC word: sentence
perturbed SRC word: As
[(('replace', 'erwähnte', 'wurde'), 1), (('insert', '', 'erwähnt'), 1)]
----------------------
original SRC word: sent
perturbed SRC word: taken
[(('delete', 'seltenen', ''), 1), (('replace', 'Teams zum', 'am'), 1)]
----------------------
original SRC word: sent
perturbed SRC word: ordered
[(('insert', '', 'es wurden'), 1), (('replace', 'Botschafter', 'Botschaftern'), 1)]
----------------------
original SRC word: sent
perturbed SRC word: gave
[(('replace

[(('insert', '', 'mehrjährige'), 1), (('delete', 'über mehrere Jahre', ''), 1)]
----------------------
original SRC word: restaurant
perturbed SRC word: stadium
[(('replace', 'in meinen Kalender ein', 'hinzu'), 1), (('replace', 'gehen', 'werden'), 1)]
----------------------
original SRC word: respect
perturbed SRC word: protect
[(('replace', 'Stipendiaten sind', 'Stipendiaten-Kinder'), 1), (('replace', 'kulturelle Veränderungen', 'den kulturellen Wandel'), 1)]
----------------------
original SRC word: restaurant
perturbed SRC word: room
[(('replace', 'es', 'er'), 1), (('insert', '', 'ungefähre'), 1)]
----------------------
original SRC word: restaurant
perturbed SRC word: house
[(('replace', 'dich', 'Sie'), 1), (('insert', '', 'ungefähre'), 1)]
----------------------
original SRC word: rest
perturbed SRC word: good
[(('insert', '', 'gut tun'), 1), (('delete', 'auf dem Arm', ''), 1)]
----------------------
original SRC word: rest
perturbed SRC word: be
[(('replace', 'Kette', 'Halskette'

[(('replace', 'Boden', 'Terrain'), 1), (('insert', '', 'gewinnen Sie'), 1)]
----------------------
original SRC word: round
perturbed SRC word: around
[(('replace', 'Boden', 'Terrain'), 1), (('replace', 'harrte', 'hielt'), 1)]
----------------------
original SRC word: room
perturbed SRC word: office
[(('replace', 'den Moment', 'die Gunst der Stunde'), 1), (('replace', 'aufgefordert', 'angerufen'), 1)]
----------------------
original SRC word: room
perturbed SRC word: house
[(('replace', 'wagt es', 'traut sich'), 1), (('replace', 'habe', 'knipste'), 1)]
----------------------
original SRC word: roof
perturbed SRC word: shape
[(('replace', 'leuchten', 'scheinen'), 1), (('replace', 'hätten Sie', 'hättest du'), 1)]
----------------------
original SRC word: role
perturbed SRC word: position
[(('replace', 'spielen', 'sein'), 1), (('replace', 'besteht darin', 'ist es'), 1)]
----------------------
original SRC word: roof
perturbed SRC word: face
[(('replace', 'leuchten', 'scheinen'), 1), (('re

[(('replace', 'besteht', 'gibt es'), 1), (('replace', 'dass es langweilig wird', 'sich zu langweilen'), 1)]
----------------------
original SRC word: risk
perturbed SRC word: were
[(('replace', 'besteht', 'gibt es'), 1), (('replace', 'Sie', 'dich'), 1)]
----------------------
original SRC word: river
perturbed SRC word: Lake
[(('replace', 'zogen', 'schleppten'), 1), (('replace', 'Hundertdreizehn', 'Einhundertdreizehn'), 1)]
----------------------
original SRC word: river
perturbed SRC word: Rock
[(('replace', 'langsamer Pferde zogen', 'von langsamen Pferden schleppten'), 1), (('replace', 'führt', 'trägt'), 1)]
----------------------
original SRC word: river
perturbed SRC word: The
[(('replace', 'er', 'es'), 1), (('replace', 'führt', 'trägt'), 1)]
----------------------
original SRC word: river
perturbed SRC word: stream
[(('replace', 'zogen', 'schleppten'), 1), (('replace', 'Linie', 'Strecke'), 1)]
----------------------
original SRC word: road
perturbed SRC word: died
[(('replace', 'u

[(('replace', 'Um sein', 'Sein'), 1), (('replace', 'Danach beginnt', 'Dann fängt'), 1)]
----------------------
original SRC word: south
perturbed SRC word: east
[(('replace', 'Meilen', 'Kilometer'), 1), (('delete', 'keine Chance', ''), 1)]
----------------------
original SRC word: spain
perturbed SRC word: France
[(('replace', 'Sie', 'Es'), 1), (('replace', 'spreche', 'möchte'), 1)]
----------------------
original SRC word: spain
perturbed SRC word: England
[(('replace', 'USA', 'Vereinigten Staaten'), 1), (('replace', 'Herr', 'Lord'), 1)]
----------------------
original SRC word: spain
perturbed SRC word: Canada
[(('replace', 'selbst', 'sich aus'), 1)]
----------------------
original SRC word: space
perturbed SRC word: room
[(('replace', 'zu vergrößern , in', 'für'), 1), (('insert', '', 'zu vergrößern'), 1)]
----------------------
original SRC word: space
perturbed SRC word: power
[(('replace', 'vergrößern', 'steigern'), 1), (('replace', 'den Anfängen', 'dem Beginn'), 1)]
-------------

[(('replace', 'abholten', 'abholen'), 1), (('insert', '', 'gingen'), 1)]
----------------------
original SRC word: story
perturbed SRC word: novel
[(('replace', 'abholten', 'abholen'), 1), (('insert', '', 'gingen'), 1)]
----------------------
original SRC word: story
perturbed SRC word: storey
[(('replace', 'abholten', 'abholen'), 1), (('insert', '', 'gingen'), 1)]
----------------------
original SRC word: story
perturbed SRC word: tale
[(('insert', '', 'geöffnet'), 1), (('replace', 'los , ich habe', 'geh ! Ich aß'), 1)]
----------------------
original SRC word: story
perturbed SRC word: work
[(('replace', 'Bekennend als', 'Bekenntnis zum'), 1), (('insert', '', 'geöffnet'), 1)]
----------------------
original SRC word: straight
perturbed SRC word: a
[(('replace', 'gemäßigte', 'Mäßigung'), 1), (('insert', '', 'biegen Sie'), 1)]
----------------------
original SRC word: straight
perturbed SRC word: back
[(('replace', 'Fahren', 'Gehen'), 1), (('replace', 'einem eiskalten', 'eisigem'), 1)]

[(('insert', '', 'hinunter'), 1), (('replace', 'hinunterging', 'ging'), 1)]
----------------------
original SRC word: sound
perturbed SRC word: kind
[(('replace', 'sich', 'einander'), 1), (('replace', 'ließ', 'machte'), 1)]
----------------------
original SRC word: sound
perturbed SRC word: electrical
[(('replace', 'deutete', 'zeigte'), 1), (('replace', 'kamen', 'näherten'), 1)]
----------------------
original SRC word: sound
perturbed SRC word: be
[(('replace', 'der Pfeife', "the pipe ''"), 1), (('replace', 'kamen', 'näherten'), 1)]
----------------------
original SRC word: sir
perturbed SRC word: though
[(('replace', 'sagte', 'hat'), 1), (('insert', '', 'gesagt'), 1)]
----------------------
original SRC word: sister
perturbed SRC word: wife
[(('replace', 'mit dem Fernsehsender', 'des Fernsehsenders'), 1), (('delete', '/ sie', ''), 1)]
----------------------
original SRC word: sister
perturbed SRC word: mother
[(('delete', '/ sie', ''), 1), (('replace', 'den Würfeln gegenüberzutreten'

[(('replace', ', was erneuerbar ist ,', 'Erneuerbare'), 1), (('delete', 'tun', ''), 1)]
----------------------
original SRC word: shut
perturbed SRC word: lock
[(('replace', 'jammerte', 'jammert'), 1), (('replace', 'den Mund', 'sich ein'), 1)]
----------------------
original SRC word: sick
perturbed SRC word: dead
[(('replace', 'fröstelt', 'bekomme Schauer'), 1), (('replace', 'Kerl', 'Typen'), 1)]
----------------------
original SRC word: sick
perturbed SRC word: killed
[(('delete', 'es', ''), 1)]
----------------------
original SRC word: sick
perturbed SRC word: lost
[(('replace', 'Kerl', 'Mann'), 1), (('replace', 'Haben', 'Sagen'), 1)]
----------------------
original SRC word: sick
perturbed SRC word: right
[(('replace', 'fröstelt', 'bekomme Schauer'), 1), (('insert', '', 'dass ich'), 1)]
----------------------
original SRC word: side
perturbed SRC word: feet
[(('replace', 'stehe', 'bin'), 1), (('insert', '', 'die Inder'), 1)]
----------------------
original SRC word: side
perturbed 

[(('replace', 'Das', 'zufällig verändert , das'), 1), (('replace', 'Es', 'Das'), 1)]
----------------------
original SRC word: soul
perturbed SRC word: man
[(('replace', 'Es', 'Das'), 1), (('replace', 'das', 'es'), 1)]
----------------------
original SRC word: software
perturbed SRC word: will
[(('insert', '', 'ermöglichen'), 1), (('replace', 'eingeführt', 'umgesetzt'), 1)]
----------------------
original SRC word: software
perturbed SRC word: is
[(('replace', 'Die Open-Source-Softwarephilosophie', 'Open Source'), 1), (('insert', '', 'Philosophie und'), 1)]
----------------------
original SRC word: sixty
perturbed SRC word: thirty
[(('insert', '', 'Gasse'), 1), (('delete', 'Gasse', ''), 1)]
----------------------
original SRC word: sky
perturbed SRC word: sun
[(('replace', 'Ruhe des blauen Meeres', 'blaue Seelenruhe'), 1), (('replace', 'einem', 'einer'), 1)]
----------------------
original SRC word: sleep
perturbed SRC word: stay
[(('delete', 'ganze', ''), 1), (('replace', 'aussaugen',

[(('replace', 'so', 'sagt'), 1), (('delete', 'sei geschäftlich überlastet und', ''), 1)]
----------------------
original SRC word: create
perturbed SRC word: have
[(('replace', 'bildeten', 'gründeten'), 1), (('delete', 'zu versuchen', ''), 1)]
----------------------
original SRC word: create
perturbed SRC word: write
[(('replace', 'heutigen', 'aktuellen'), 1), (('delete', 'zu versuchen', ''), 1)]
----------------------
original SRC word: court
perturbed SRC word: examination
[(('replace', 'so', 'sagt'), 1), (('delete', 'sei geschäftlich überlastet und', ''), 1)]
----------------------
original SRC word: crime
perturbed SRC word: what
[(('replace', 'dieses', 'jenes'), 1), (('replace', 'Behörde', 'Agentur'), 1)]
----------------------
original SRC word: crowd
perturbed SRC word: group
[(('replace', ', sie', ': Sie'), 1), (('replace', 'rauschte', 'eilte'), 1)]
----------------------
original SRC word: cross
perturbed SRC word: One
[(('replace', 'Wir sollten die persönliche Grenze', 'Lasse

[(('replace', 'gelassen', 'lassen'), 1), (('replace', 'Jahren', 'Jahre lang'), 1)]
----------------------
original SRC word: couple
perturbed SRC word: few
[(('delete', 'zum Kauf', ''), 1), (('replace', 'traf', 'trafen'), 1)]
----------------------
original SRC word: country
perturbed SRC word: world
[(('insert', '', ', die er'), 1), (('replace', 'außer für große nationale', 'mit Ausnahme großer nationaler'), 1)]
----------------------
original SRC word: country
perturbed SRC word: nation
[(('insert', '', ', die er'), 1), (('replace', 'sechstausend dreihundert', '6300'), 1)]
----------------------
original SRC word: country
perturbed SRC word: city
[(('insert', '', ', die er'), 1), (('replace', 'Diese Dinge müssen', 'Das muss sich'), 1)]
----------------------
original SRC word: crowd
perturbed SRC word: population
[(('replace', 'setzte', 'begann'), 1), (('replace', 'Das', 'Unterstützung erhielt das'), 1)]
----------------------
original SRC word: crowd
perturbed SRC word: world
[(('re

[(('replace', 'Mutti', 'Mutter'), 1), (('replace', 'hast du', 'haben Sie'), 1)]
----------------------
original SRC word: dad
perturbed SRC word: father
[(('delete', 'nie um Geschenke gebettelt', ''), 1), (('insert', '', ', nie um sie gebettelt'), 1)]
----------------------
original SRC word: date
perturbed SRC word: year
[(('replace', '< unk > Ersatzplan < unk >', 'Substitutionsplan'), 1), (('insert', '', 'weisen'), 1)]
----------------------
original SRC word: dad
perturbed SRC word: I
[(('insert', '', 'bei Geschenken'), 1), (('replace', 'Geschenke', 'sie'), 1)]
----------------------
original SRC word: cut
perturbed SRC word: rounded
[(('replace', 'sich', 'seinen Schnurrbart'), 1), (('delete', 'den Schnurrbart', ''), 1)]
----------------------
original SRC word: cut
perturbed SRC word: made
[(('replace', 'schließlich', 'endlich seinen Schnurrbart'), 1), (('delete', 'den Schnurrbart', ''), 1)]
----------------------
original SRC word: culture
perturbed SRC word: use
[(('replace', 'Me

[(('replace', 'Amtes', 'Büros'), 1), (('replace', 'machen Sie', 'machst du'), 1)]
----------------------
original SRC word: commission
perturbed SRC word: court
[(('replace', 'Gesetzesentwurfs', 'Gesetzentwurfs'), 1), (('replace', 'Frau', 'die'), 1)]
----------------------
original SRC word: commission
perturbed SRC word: committee
[(('replace', 'Gesetzesentwurfs', 'Gesetzentwurfs'), 1), (('replace', ', ich', '! Ich'), 1)]
----------------------
original SRC word: commission
perturbed SRC word: President
[(('replace', 'Gesetzesentwurfs', 'Gesetzentwurfs'), 1), (('replace', 'Frau', 'die'), 1)]
----------------------
original SRC word: commission
perturbed SRC word: Court
[(('replace', 'Gesetzesentwurfs', 'Gesetzentwurfs'), 1), (('replace', 'Frau', 'die'), 1)]
----------------------
original SRC word: comments
perturbed SRC word: words
[(('replace', 'einzige', 'Einzige'), 1), (('replace', 'empfohlen', 'vorgeschlagen'), 1)]
----------------------
original SRC word: colleagues
perturbed SR

[(('replace', 'beeindruckte', 'hat'), 1), (('replace', 'unabhängig', 'unabhängiger Position'), 1)]
----------------------
original SRC word: control
perturbed SRC word: hand
[(('replace', 'Nahrungsmitteln', 'Lebensmitteln'), 1), (('replace', 'Bewertung', 'Beurteilung'), 1)]
----------------------
original SRC word: contract
perturbed SRC word: plan
[(('replace', 'sich über einige', 'mit einigen'), 1), (('replace', 'bekam', 'bekommt'), 1)]
----------------------
original SRC word: contract
perturbed SRC word: basis
[(('insert', '', 'dass man in'), 1), (('replace', 'studiert', 'untersucht'), 1)]
----------------------
original SRC word: contract
perturbed SRC word: agreement
[(('replace', 'unterschrieben', 'unterzeichnet'), 1), (('replace', 'Mitarbeiter', 'Arbeitnehmer'), 1)]
----------------------
original SRC word: continue
perturbed SRC word: start
[(('insert', '', ', Änderungsantrag 259 zu unterstützen'), 1), (('replace', 'rechnet damit', 'erwartet'), 1)]
----------------------
origi

[(('replace', 'durch Delegieren', 'indem er delegiert'), 1), (('replace', 'Mozilla-Gemeinschaft', 'Mozilla-Gebiets'), 1)]
----------------------
original SRC word: community
perturbed SRC word: Commission
[(('replace', 'durch Delegieren', 'indem er delegiert'), 1), (('delete', 'sich', ''), 1)]
----------------------
original SRC word: computer
perturbed SRC word: life
[(('replace', 'einzurichten', 'zu gestalten'), 1), (('replace', 'Regierungsbehörden', 'staatliche Stellen'), 1)]
----------------------
original SRC word: computer
perturbed SRC word: the
[(('replace', 'noch ein Satz', 'eine weitere Phrase'), 1), (('replace', 'viel', 'sehr'), 1)]
----------------------
original SRC word: concern
perturbed SRC word: importance
[(('insert', '', 'erkannt'), 1), (('replace', 'Vor allem die', 'In erster Linie sind'), 1)]
----------------------
original SRC word: concern
perturbed SRC word: interest
[(('insert', '', 'ihn'), 1), (('replace', 'mit ihm sprach', 'ansprach'), 1)]
-------------------

[(('replace', 'Eins', 'Eine'), 1), (('replace', 'in der dieser', 'die diesen'), 1)]
----------------------
original SRC word: end
perturbed SRC word: way
[(('replace', 'Südhan-Dynastie', 'Süd-Han-Dynastie'), 1), (('replace', 'Irgendwann', 'Ab einem bestimmten Moment'), 1)]
----------------------
original SRC word: end
perturbed SRC word: time
[(('replace', 'ist egal', 'spielt keine Rolle'), 1), (('replace', 'sehr', 'viel'), 1)]
----------------------
original SRC word: end
perturbed SRC word: start
[(('delete', 'leicht fallen ,', ''), 1), (('replace', 'beweisen', 'zeigen'), 1)]
----------------------
original SRC word: end
perturbed SRC word: go
[(('replace', 'Dem', 'Wir'), 1), (('insert', '', 'Nehmen wir die höchsten Anklagen an .'), 1)]
----------------------
original SRC word: end
perturbed SRC word: beginning
[(('delete', 'leicht fallen ,', ''), 1), (('replace', 'ist', 'fängt'), 1)]
----------------------
original SRC word: eighty
perturbed SRC word: thirty
[(('replace', 'fünfhunde

[(('replace', 'Ich', 'Das'), 1), (('insert', '', 'ich'), 1)]
----------------------
original SRC word: everything
perturbed SRC word: me
[(('replace', 'geregelt', 'arrangiert'), 1), (('replace', 'Jeder kümmert', 'Alle kümmern'), 1)]
----------------------
original SRC word: everything
perturbed SRC word: it
[(('replace', 'geregelt', 'arrangiert'), 1), (('replace', 'peinliche', 'aus peinlichen'), 1)]
----------------------
original SRC word: everyone
perturbed SRC word: you
[(('delete', 'erforderlichen', ''), 1), (('insert', '', ', die nötig waren'), 1)]
----------------------
original SRC word: example
perturbed SRC word: instance
[(('replace', 'versteckte Lebensmittel', 'verstecktes Essen'), 1), (('replace', 'dienen', 'herangezogen werden'), 1)]
----------------------
original SRC word: everyone
perturbed SRC word: me
[(('delete', 'erforderlichen', ''), 1), (('insert', '', ', die erforderlich waren'), 1)]
----------------------
original SRC word: everyone
perturbed SRC word: he
[(('de

[(('replace', 'beträgt', 'liegt'), 1), (('replace', 'Der Tageslohn liegt bei', 'Die tägliche Auszahlung beträgt'), 1)]
----------------------
original SRC word: euros
perturbed SRC word: dollars
[(('replace', 'beträgt', 'liegt'), 1), (('replace', 'Der Tageslohn liegt bei', 'Die tägliche Auszahlung beträgt'), 1)]
----------------------
original SRC word: euros
perturbed SRC word: pounds
[(('replace', 'Der Tageslohn liegt bei', 'Die tägliche Auszahlung beträgt'), 1), (('replace', 'Zwei-Euro-Münzen', 'Zwei-Pfund-Münzen'), 1)]
----------------------
original SRC word: euros
perturbed SRC word: thousand
[(('replace', 'erhöhte sich', 'stieg'), 1), (('replace', 'dreiundsechzig', 'Punkt drei'), 1)]
----------------------
original SRC word: event
perturbed SRC word: competition
[(('replace', 'Diese', 'Sie haben diese'), 1), (('replace', 'Miete', 'Mieten'), 1)]
----------------------
original SRC word: event
perturbed SRC word: festival
[(('replace', 'Bestrafung ebenso', 'Strafe'), 1), (('replac

[(('replace', 'sind noch', 'bleibt'), 1), (('replace', 'fügte', 'schlug'), 1)]
----------------------
original SRC word: details
perturbed SRC word: things
[(('replace', 'mitteilen', 'geben'), 1), (('replace', 'möchte', 'will mich'), 1)]
----------------------
original SRC word: details
perturbed SRC word: terms
[(('replace', 'sind noch', 'bleiben'), 1), (('replace', 'möchte', 'will mich'), 1)]
----------------------
original SRC word: degree
perturbed SRC word: class
[(('replace', 'verliehen', 'vergeben'), 1), (('replace', 'erwarb', 'gewann'), 1)]
----------------------
original SRC word: denis
perturbed SRC word: Martin
[(('replace', 'sprechen', 'das Wort ergreifen'), 1)]
----------------------
original SRC word: denis
perturbed SRC word: He
[(('insert', '', 'Redner , der'), 1), (('replace', 'registrierten Redner', 'registriert ist'), 1)]
----------------------
original SRC word: delegation
perturbed SRC word: government
[(('replace', 'diesem Thema', 'dieser Angelegenheit'), 1), (('r

[(('replace', 'fünfhundertvierzig', 'fünfhundert vierzig'), 1), (('delete', 'es', ''), 1)]
----------------------
original SRC word: doors
perturbed SRC word: walls
[(('replace', 'schwenkten', 'schwangen'), 1), (('replace', 'stehen', 'bleiben'), 1)]
----------------------
original SRC word: doors
perturbed SRC word: of
[(('replace', 'Entlang', 'Im Flur'), 1), (('replace', 'Stock', 'Obergeschosses'), 1)]
----------------------
original SRC word: dress
perturbed SRC word: robe
[(('replace', 'es', 'sie'), 1), (('delete', ', das', ''), 1)]
----------------------
original SRC word: dress
perturbed SRC word: suit
[(('replace', 'es', 'ihn'), 1), (('replace', 'sitze', 'sitzen Sie'), 1)]
----------------------
original SRC word: disarmament
perturbed SRC word: war
[(('replace', 'Maßnahmen', 'Aktionen'), 1), (('replace', 'Bekenntnis zur', 'Engagement für die'), 1)]
----------------------
original SRC word: drunk
perturbed SRC word: drank
[(('replace', 'sagte', 'erzählte'), 1), (('replace', ', de

[(('insert', '', 'hegen'), 1), (('insert', '', 'sagen'), 1)]
----------------------
original SRC word: don
perturbed SRC word: wouldn
[(('insert', '', 'hegen'), 1), (('insert', '', 'sagen'), 1)]
----------------------
original SRC word: door
perturbed SRC word: bell
[(('replace', 'drehen Sie ihre Scharniere', 'drehte sich an den Scharnieren'), 1), (('replace', 'die Kante', 'den Rand'), 1)]
----------------------
original SRC word: door
perturbed SRC word: ground
[(('replace', 'ihre Scharniere', 'sich an den Scharnieren'), 1), (('replace', 'geöffnet', 'offen'), 1)]
----------------------
original SRC word: church
perturbed SRC word: school
[(('replace', 'geweiht', 'gewidmet'), 1), (('replace', 'gab es in Bezug auf', 'tobte ein heftiger und langwieriger Streit um'), 1)]
----------------------
original SRC word: christmas
perturbed SRC word: you
[(('replace', 'war wieder vereint', 'wurde wiedervereinigt'), 1), (('replace', 'beraten', 'haben'), 1)]
----------------------
original SRC word:

[(('delete', 'Der Kreis', ''), 1), (('insert', '', 'irgendwelche'), 1)]
----------------------
original SRC word: area
perturbed SRC word: city
[(('delete', 'Der Kreis', ''), 1), (('replace', 'Küstennähe', 'der Nähe der Küste'), 1)]
----------------------
original SRC word: april
perturbed SRC word: the
[(('replace', 'Erstaufführung im', 'Spielte zum ersten Mal am'), 1), (('replace', 'Sachanlagen', 'Sachgüter'), 1)]
----------------------
original SRC word: april
perturbed SRC word: May
[(('replace', 'Sachanlagen', 'Sachgüter'), 1), (('replace', 'Zweitausend und acht', 'Zweitausendundacht'), 1)]
----------------------
original SRC word: april
perturbed SRC word: March
[(('replace', 'Im', 'Es wird eine im'), 1), (('delete', 'wird es eine', ''), 1)]
----------------------
original SRC word: april
perturbed SRC word: June
[(('replace', 'Zweitausend und acht', 'Zweitausendundacht'), 1), (('replace', 'Im', 'Es wird eine im'), 1)]
----------------------
original SRC word: aren
perturbed SRC 

[(('insert', '', 'der Zeit'), 1), (('replace', 'werden', 'beginnen'), 1)]
----------------------
original SRC word: bit
perturbed SRC word: cup
[(('delete', 'sprechen', ''), 1), (('replace', 'Das', 'Es'), 1)]
----------------------
original SRC word: bis
perturbed SRC word: thousand
[(('replace', 'Road', 'Straße'), 1), (('replace', 'André Pal Straße', 'André-Pal-Straße'), 1)]
----------------------
original SRC word: bis
perturbed SRC word: hundred
[(('replace', 'Villette', 'Villette-Terrassen'), 1), (('insert', '', 'Straße'), 1)]
----------------------
original SRC word: birthday
perturbed SRC word: wedding
[(('replace', 'es', 'sie'), 1), (('replace', 'an', 'Teil'), 1)]
----------------------
original SRC word: birthday
perturbed SRC word: own
[(('insert', '', 'sich'), 1), (('insert', '', 'beruhigen'), 1)]
----------------------
original SRC word: birthday
perturbed SRC word: Christmas
[(('replace', 'willigte', 'erklärte'), 1), (('insert', '', 'sich'), 1)]
----------------------
origi

[(('replace', 'aufwendet', 'ausgibt'), 1), (('delete', 'werden , um Programme zu erstellen', ''), 1)]
----------------------
original SRC word: addition
perturbed SRC word: particular
[(('replace', 'sondern auch', 'war er'), 1), (('replace', 'über alle', 'von allen'), 1)]
----------------------
original SRC word: advantage
perturbed SRC word: time
[(('replace', 'sind', 'betreffen'), 1), (('insert', '', 'besteht'), 1)]
----------------------
original SRC word: addition
perturbed SRC word: general
[(('delete', 'fand ich', ''), 1), (('delete', 'neue kommerzielle Modelle', ''), 1)]
----------------------
original SRC word: addition
perturbed SRC word: fact
[(('replace', ', sondern auch mein Großvater', 'zu sein'), 1), (('replace', 'informiert', 'beschlagnahmen'), 1)]
----------------------
original SRC word: addition
perturbed SRC word: contrast
[(('replace', 'ins Leben gerufen', 'gestartet'), 1), (('replace', 'über alle', 'von allen'), 1)]
----------------------
original SRC word: actor
p

[(('delete', 'sich', ''), 1), (('replace', 'gegen die', 'das Gegenteil von'), 1)]
----------------------
original SRC word: act
perturbed SRC word: budget
[(('replace', 'Zweiter', 'Zweites'), 1), (('replace', 'Ämter', 'Positionen'), 1)]
----------------------
original SRC word: act
perturbed SRC word: work
[(('replace', 'Zweiter', 'Zweites'), 1), (('replace', 'Ämter', 'Positionen'), 1)]
----------------------
original SRC word: action
perturbed SRC word: it
[(('replace', 'das darauf abzielt', 'dessen Ziel es ist'), 1), (('insert', '', ', indem es zerstückelt wird'), 1)]
----------------------
original SRC word: action
perturbed SRC word: place
[(('replace', 'das darauf abzielt', 'dessen Ziel es ist'), 1), (('replace', 'Handelte es sich bei', 'Andererseits : Hatte'), 1)]
----------------------
original SRC word: actions
perturbed SRC word: activities
[(('replace', 'Diese', 'Um diese'), 1), (('replace', 'Mehrere', 'Einige'), 1)]
----------------------
original SRC word: actions
perturbed

[(('insert', '', 'Herr'), 1), (('replace', 'du kannst', 'Sie können'), 1)]
----------------------
original SRC word: alright
perturbed SRC word: Okay
[(('replace', "los geht 's", 'komm , lass uns'), 1), (('insert', '', 'gehen'), 1)]
----------------------
original SRC word: alright
perturbed SRC word: No
[(('insert', '', 'Herr'), 1), (('replace', 'du kannst', 'Sie können'), 1)]
----------------------
original SRC word: allows
perturbed SRC word: is
[(('replace', 'einzugehen', 'eingehen'), 1), (('replace', 'Es', 'Aber es'), 1)]
----------------------
original SRC word: allows
perturbed SRC word: enables
[(('replace', 'sie', 'es'), 1), (('delete', 'sich', ''), 1)]
----------------------
original SRC word: alley
perturbed SRC word: Park
[(('replace', 'haben', 'tragen'), 1), (('replace', 'Tempel Gottes des Landes , genannt < unk > Mingde Palace', "`` Mingde-Palast ''"), 1)]
----------------------
original SRC word: ancient
perturbed SRC word: old
[(('delete', 'Sijiaobu', ''), 1), (('delete

[(('replace', 'Führen Sie', 'Führe'), 1), (('insert', '', 'ich'), 1)]
----------------------
original SRC word: center
perturbed SRC word: centre
[(('replace', 'fungierten', 'dienten'), 1), (('replace', 'Fürs Erste', 'Vorerst'), 1)]
----------------------
original SRC word: census
perturbed SRC word: wilderness
[(('delete', 'kalifornischen', ''), 1), (('replace', 'Gemeinde durchzuführen', 'Gemeinschaft zu nehmen'), 1)]
----------------------
original SRC word: census
perturbed SRC word: territory
[(('replace', 'Gemeinde durchzuführen', 'Gemeinschaft einzunehmen'), 1), (('insert', '', 'Sprachen , die'), 1)]
----------------------
original SRC word: census
perturbed SRC word: recreation
[(('replace', 'Gemeinde durchzuführen', 'Gemeinschaft zu nehmen'), 1)]
----------------------
original SRC word: census
perturbed SRC word: federally
[(('replace', 'Gemeinde durchzuführen', 'Gemeinschaft zu nehmen'), 1), (('insert', '', 'Sprachen , die'), 1)]
----------------------
original SRC word: caus

[(('replace', 'steht', 'liegt'), 1), (('insert', '', 'ich'), 1)]
----------------------
original SRC word: century
perturbed SRC word: while
[(('replace', 'seit Anfang des', 'zu Beginn der'), 1), (('replace', 'ein', 'das erste'), 1)]
----------------------
original SRC word: chest
perturbed SRC word: will
[(('insert', '', 'seinen Amaranth-Testamentsbeschützer'), 1), (('insert', '', 'des Schmerzes ,'), 1)]
----------------------
original SRC word: child
perturbed SRC word: man
[(('replace', 'es', 'er'), 1), (('insert', '', 'verirrt'), 1)]
----------------------
original SRC word: chief
perturbed SRC word: principal
[(('replace', 'Ersten Offiziellen Zeremonienabteilung', 'First Official Ceremony Abteilung'), 1), (('replace', 'Saatgut', 'Samen'), 1)]
----------------------
original SRC word: chief
perturbed SRC word: officer
[(('replace', 'Ersten Offiziellen Zeremonienabteilung', 'First Official Ceremony Department'), 1), (('replace', 'FBI-Chef', 'FBI-Beamte'), 1)]
----------------------


[(('replace', 'Junge', 'junge'), 1), (('replace', 'Lehne', 'Rückseite'), 1)]
----------------------
original SRC word: chair
perturbed SRC word: seat
[(('replace', 'Junge', 'junge'), 1), (('replace', 'wuschen', 'gingen hin , um'), 1)]
----------------------
original SRC word: chair
perturbed SRC word: floor
[(('replace', 'Junge', 'junge'), 1), (('replace', 'Wasserbecken', 'Becken mit Wasser'), 1)]
----------------------
original SRC word: changes
perturbed SRC word: matters
[(('replace', 'bewirkt', 'an'), 1), (('replace', 'sich die', 'wichtig'), 1)]
----------------------
original SRC word: chapter
perturbed SRC word: Tracks
[(('replace', 'an gleicher Stelle', 'am selben Ort'), 1), (('replace', 'erstes', 'zuerst'), 1)]
----------------------
original SRC word: character
perturbed SRC word: body
[(('replace', 'Das', 'Es'), 1), (('replace', 'Verzögerung', 'Hindernis'), 1)]
----------------------
original SRC word: character
perturbed SRC word: children
[(('replace', 'des Darstellers', 'd

[(('replace', 'brauchen Sie', 'braucht man'), 1), (('replace', 'sich', 'ihn'), 1)]
----------------------
original SRC word: bread
perturbed SRC word: wine
[(('replace', 'unterwegs', 'auf dem Weg'), 1), (('replace', 'um Leib', 'Körper'), 1)]
----------------------
original SRC word: bread
perturbed SRC word: it
[(('replace', 'unterwegs', 'auf dem Weg'), 1), (('replace', 'Zartheit', 'Zärtlichkeit'), 1)]
----------------------
original SRC word: brave
perturbed SRC word: young
[(('replace', 'haben', 'bauten'), 1), (('replace', 'aufgebaut', 'auf'), 1)]
----------------------
original SRC word: bottom
perturbed SRC word: foot
[(('delete', 'es', ''), 1), (('replace', 'war', 'befand sich'), 1)]
----------------------
original SRC word: bottom
perturbed SRC word: edge
[(('insert', '', 'es gibt kein Unterwasserleben'), 1), (('replace', 'Sehen Sie', 'Siehst du'), 1)]
----------------------
original SRC word: bottom
perturbed SRC word: back
[(('insert', '', 'es gibt kein Unterwasserleben'), 1), 

[(('replace', 'des Mangels an', 'fehlender'), 1), (('insert', '', 'es'), 1)]
----------------------
original SRC word: build
perturbed SRC word: make
[(('replace', 'Fürs Erste', 'Vorerst'), 1), (('replace', 'Fence Center', 'Zaunzentrum'), 1)]
----------------------
original SRC word: build
perturbed SRC word: find
[(('insert', '', 'zu benutzen , um'), 1), (('replace', 'Sie', 'Es'), 1)]
----------------------
original SRC word: build
perturbed SRC word: create
[(('replace', 'des Mangels an', 'fehlender'), 1), (('replace', 'des Einsatzes', 'der Verwendung'), 1)]
----------------------
original SRC word: budget
perturbed SRC word: the
[(('replace', 'unternommen', 'zu senken'), 1), (('replace', 'vorlegen', 'präsentieren'), 1)]
----------------------
original SRC word: built
perturbed SRC word: made
[(('replace', '41 .', 'einundvierzigsten'), 1), (('delete', 'den Boden', ''), 1)]
----------------------
original SRC word: bus
perturbed SRC word: one
[(('replace', 'zum Bahnverkehr', 'zu den E

[(('delete', 'alle', ''), 1), (('replace', 'Arnaud-Allee', 'Arnaud Avenue'), 1)]
----------------------
original SRC word: brothers
perturbed SRC word: people
[(('delete', 'alle', ''), 1), (('replace', 'saßen', 'befanden sich'), 1)]
----------------------
original SRC word: brought
perturbed SRC word: brings
[(('replace', 'sehen kann', 'sehe'), 1), (('replace', 'selbst eingebrockt', 'auf sich'), 1)]
----------------------
original SRC word: brought
perturbed SRC word: do
[(('replace', 'Klarstellungen', 'Klarstellung'), 1), (('delete', ', selbst eingebrockt', ''), 1)]
----------------------
original SRC word: brought
perturbed SRC word: for
[(('delete', 'danken', ''), 1), (('delete', ', selbst eingebrockt', ''), 1)]
----------------------
original SRC word: brought
perturbed SRC word: led
[(('replace', 'selbst eingebrockt', 'auf sich genommen'), 1), (('replace', 'Fähigkeiten mit', 'Fertigkeiten aus'), 1)]
----------------------
original SRC word: brought
perturbed SRC word: took
[(('rep

[(('replace', 'der Erde', 'Tage'), 1), (('delete', 'mit Entenfluggeschwindigkeit', ''), 1)]
----------------------
original SRC word: house
perturbed SRC word: apartment
[(('replace', 'ein neues Sonnensegel', 'eine neue Markise'), 1), (('replace', 'rauszugehen', 'auszugehen'), 1)]
----------------------
original SRC word: house
perturbed SRC word: place
[(('replace', 'rauszugehen', 'auszugehen'), 1), (('replace', 'gehe', 'begebe mich'), 1)]
----------------------
original SRC word: house
perturbed SRC word: room
[(('replace', 'es sich bei der', 'die'), 1), (('replace', 'meine Route', 'mein Weg'), 1)]
----------------------
original SRC word: house
perturbed SRC word: village
[(('replace', 'es sich bei der', 'die'), 1), (('replace', 'rauszugehen', 'auszugehen'), 1)]
----------------------
original SRC word: house
perturbed SRC word: way
[(('replace', 'aufstehen', 'Steh auf'), 1), (('insert', '', 'besuche'), 1)]
----------------------
original SRC word: houses
perturbed SRC word: days
[(

[(('replace', 'eines Besseren belehrt', 'widerlegt'), 1), (('delete', 'diskutierte', ''), 1)]
----------------------
original SRC word: job
perturbed SRC word: and
[(('replace', 'angeht', 'betrifft'), 1), (('replace', 'Lass', 'Lassen Sie'), 1)]
----------------------
original SRC word: job
perturbed SRC word: homework
[(('replace', 'Was uns angeht , so', 'Wir beide'), 1), (('replace', 'du', 'man'), 1)]
----------------------
original SRC word: job
perturbed SRC word: task
[(('replace', 'angeht', 'betrifft'), 1), (('replace', 'Lass', 'Lassen Sie'), 1)]
----------------------
original SRC word: job
perturbed SRC word: work
[(('replace', 'angeht', 'betrifft'), 1), (('replace', 'machen', 'aufnehmen'), 1)]
----------------------
original SRC word: jobs
perturbed SRC word: employees
[(('replace', 'Holzarbeiter', 'Waldarbeiter'), 1), (('replace', 'erledigte', 'würde'), 1)]
----------------------
original SRC word: issue
perturbed SRC word: matter
[(('replace', 'Menschen', 'Belgrader haben'), 

[(('replace', 'noch keinen blauen', 'nichts von blauem'), 1), (('replace', 'nicht bekommen', 'dazu'), 1)]
----------------------
original SRC word: haven
perturbed SRC word: hadn
[(('replace', 'Fisch', 'Fische'), 1), (('replace', 'Sagen', 'Nehmen'), 1)]
----------------------
original SRC word: haven
perturbed SRC word: place
[(('replace', 'letzter', 'gestern'), 1), (('replace', 'nicht vergessen', 'in den Mittelpunkt'), 1)]
----------------------
original SRC word: head
perturbed SRC word: arms
[(('replace', 'stehen', 'still'), 1), (('delete', 'der wütende und bissige', ''), 1)]
----------------------
original SRC word: head
perturbed SRC word: face
[(('replace', 'stehen', 'still'), 1), (('delete', 'der wütende und bissige', ''), 1)]
----------------------
original SRC word: head
perturbed SRC word: hand
[(('replace', 'stehen', 'still'), 1), (('delete', 'der wütende und bissige', ''), 1)]
----------------------
original SRC word: head
perturbed SRC word: heart
[(('replace', 'stehen', '

[(('replace', 'lag', 'hatte Zoya'), 1), (('replace', 'Führung', 'Zügel'), 1)]
----------------------
original SRC word: hands
perturbed SRC word: legs
[(('replace', 'Abgestimmt', 'Die Abstimmung'), 1), (('replace', 'das Mädchen', 'sie'), 1)]
----------------------
original SRC word: happens
perturbed SRC word: comes
[(('delete', 'einige', ''), 1), (('replace', 'gehen', 'nehmen'), 1)]
----------------------
original SRC word: happens
perturbed SRC word: else
[(('replace', 'oft vor', 'häufig der Fall'), 1), (('insert', '', 'sich'), 1)]
----------------------
original SRC word: heart
perturbed SRC word: face
[(('delete', 'es', ''), 1), (('replace', 'passiert', 'passieren wird'), 1)]
----------------------
original SRC word: held
perturbed SRC word: had
[(('replace', 'sich', 'das'), 1), (('replace', 'fand', 'wurde'), 1)]
----------------------
original SRC word: hotel
perturbed SRC word: house
[(('replace', 'Leute', 'Menschen'), 1), (('replace', 'Swimmingpool', 'Pool'), 1)]
---------------

[(('replace', 'versteckte', 'versteckt hat'), 1), (('replace', 'gönnt', 'behandelt'), 1)]
----------------------
original SRC word: herself
perturbed SRC word: it
[(('replace', 'versteckte', 'versteckt hat'), 1), (('replace', 'fallen', 'gleiten'), 1)]
----------------------
original SRC word: history
perturbed SRC word: worth
[(('replace', 'studiert', 'wird'), 1), (('insert', '', 'untersucht'), 1)]
----------------------
original SRC word: herself
perturbed SRC word: me
[(('replace', 'versteckte', 'versteckt hat'), 1), (('replace', 'fallen', 'schlüpfen'), 1)]
----------------------
original SRC word: herself
perturbed SRC word: them
[(('replace', 'versteckte', 'versteckt hat'), 1), (('replace', 'gönnt', 'behandelt'), 1)]
----------------------
original SRC word: hey
perturbed SRC word: Oh
[(('replace', 'machen', 'werden'), 1), (('insert', '', 'machen'), 1)]
----------------------
original SRC word: hey
perturbed SRC word: Well
[(('replace', 'deine', 'Ihre'), 1), (('insert', '', 'Kinder

[(('replace', 'diese', 'dieser'), 1), (('replace', 'Du würdest', 'Ihr würdet'), 1)]
----------------------
original SRC word: line
perturbed SRC word: room
[(('replace', 'schließt', 'verbindet'), 1), (('replace', 'der Fili-Linie', 'des Fili-Raumes'), 1)]
----------------------
original SRC word: lie
perturbed SRC word: sit
[(('replace', 'diese liebe kleine', 'sich zu dieser lieben kleinen'), 1), (('replace', 'dich', 'jetzt'), 1)]
----------------------
original SRC word: lie
perturbed SRC word: sleep
[(('insert', '', 'dieser lieben kleinen Seele'), 1), (('delete', 'diese liebe kleine Seele', ''), 1)]
----------------------
original SRC word: life
perturbed SRC word: care
[(('replace', 'rüsten', 'wappnen'), 1), (('replace', 'bleibt immer noch', 'ist nach wie vor'), 1)]
----------------------
original SRC word: life
perturbed SRC word: it
[(('replace', 'lebensmüde', 'des Lebens überdrüssig'), 1), (('replace', 'Alles', 'Irgendetwas'), 1)]
----------------------
original SRC word: life
per

[(('replace', 'Terminkalender', 'Kalender'), 1), (('replace', 'berichteten', 'mitteilten'), 1)]
----------------------
original SRC word: marriage
perturbed SRC word: own
[(('replace', 'sahen sich', 'konfrontierten das Kind'), 1), (('delete', 'konfrontiert', ''), 1)]
----------------------
original SRC word: marriage
perturbed SRC word: this
[(('replace', 'sahen sich', 'konfrontierten das Kind'), 1), (('delete', 'konfrontiert', ''), 1)]
----------------------
original SRC word: martin
perturbed SRC word: Paul
[(('replace', 'Hausnummer', 'Nummer'), 1), (('replace', 'Basketballprofi bei den', 'ein professioneller Basketballspieler der'), 1)]
----------------------
original SRC word: master
perturbed SRC word: father
[(('delete', '< unk > Ah ! So ein Horror !', ''), 1), (('replace', 'Kreisweit', 'Im gesamten Bezirk'), 1)]
----------------------
original SRC word: master
perturbed SRC word: son
[(('delete', '< unk > Ah ! So ein Horror !', ''), 1), (('replace', 'Kreisweit', 'Im gesamten Bez

[(('replace', 'dich', 'euch'), 1), (('replace', 'gegangen', 'gestiegen'), 1)]
----------------------
original SRC word: kiss
perturbed SRC word: love
[(('delete', 'es', ''), 1), (('replace', 'alle', 'all die'), 1)]
----------------------
original SRC word: kind
perturbed SRC word: sort
[(('replace', 'Aromaknödel-Snack', 'Aroma Knödel Snack'), 1), (('replace', 'sein', 'umgehen'), 1)]
----------------------
original SRC word: kind
perturbed SRC word: type
[(('replace', 'Aromaknödel-Snack', 'Aroma Knödel Snack'), 1), (('replace', 'dieser Wandel', 'diese Veränderung'), 1)]
----------------------
original SRC word: king
perturbed SRC word: Emperor
[(('delete', ', er', ''), 1), (('replace', 'Mr.', 'Herr'), 1)]
----------------------
original SRC word: king
perturbed SRC word: Prince
[(('replace', 'König-Qin-Kampagne', 'Prinz-Qin-Kampagne'), 1), (('replace', 'gepriesene', 'gelobte'), 1)]
----------------------
original SRC word: king
perturbed SRC word: boy
[(('replace', 'Mr.', 'Herr'), 1), (

[(('replace', 'ihrem Taillenniveau', 'Taillenhöhe'), 1), (('replace', 'einer strengen', 'eine strenge'), 1)]
----------------------
original SRC word: kept
perturbed SRC word: were
[(('delete', ', das', ''), 1), (('delete', 'kam', ''), 1)]
----------------------
original SRC word: key
perturbed SRC word: breath
[(('replace', 'gefunden hatte', 'fand'), 1), (('replace', 'vergangenen', 'letzten'), 1)]
----------------------
original SRC word: key
perturbed SRC word: main
[(('replace', 'Zettel gefunden hatte', 'Schein fand'), 1), (('replace', 'er', 'sie'), 1)]
----------------------
original SRC word: key
perturbed SRC word: way
[(('insert', '', 'es'), 1), (('replace', 'Zettel gefunden hatte', 'Schein fand'), 1)]
----------------------
original SRC word: kid
perturbed SRC word: child
[(('replace', 'lässt sich', 'wird'), 1), (('replace', 'beeindrucken', "geschlagen ''"), 1)]
----------------------
original SRC word: kid
perturbed SRC word: girl
[(('insert', '', 'Sie'), 1), (('replace', 'get

[(('replace', 'gerichtet', 'adressiert'), 1), (('replace', 'einer schönen', 'eines netten'), 1)]
----------------------
original SRC word: lady
perturbed SRC word: girl
[(('delete', 'sich', ''), 1), (('replace', 'unterhalten', 'quatschen'), 1)]
----------------------
original SRC word: lady
perturbed SRC word: man
[(('replace', 'einer schönen', 'eines netten'), 1), (('replace', 'keine Peirrak-Dame', 'keinen Peirrak-Mann'), 1)]
----------------------
original SRC word: lady
perturbed SRC word: woman
[(('replace', 'Peirrak-Dame', 'Peirrak-Frau'), 1), (('replace', 'Liedern des Mainstream-Popstars', 'Songs von Popstar'), 1)]
----------------------
original SRC word: lake
perturbed SRC word: battle
[(('replace', 'einer', 'Indien , die eine'), 1), (('replace', 'Der Grund', 'Die Talsohle'), 1)]
----------------------
original SRC word: lake
perturbed SRC word: the
[(('replace', 'einer', 'Indien , das eine'), 1), (('replace', 'stammten', 'kamen'), 1)]
----------------------
original SRC word: 

[(('replace', 'Vorschriften', 'Verordnungen'), 1), (('replace', 'irgendetwas', 'etwas'), 1)]
----------------------
original SRC word: form
perturbed SRC word: of
[(('replace', 'Vorschriften', 'Verordnungen'), 1), (('replace', 'dazugehörigen', 'unterstützenden'), 1)]
----------------------
original SRC word: fort
perturbed SRC word: room
[(('delete', 'Weg', ''), 1), (('delete', 'der Straße', ''), 1)]
----------------------
original SRC word: foot
perturbed SRC word: hand
[(('replace', 'den anderen', 'die andere'), 1), (('delete', 'ihm', ''), 1)]
----------------------
original SRC word: foot
perturbed SRC word: feet
[(('replace', 'Klettert', 'Erklimmen Sie'), 1), (('replace', 'tippte', 'klopfte'), 1)]
----------------------
original SRC word: food
perturbed SRC word: them
[(('replace', 'bereitgestellt', 'geliefert'), 1), (('replace', 'Besessenheit', 'Obsession'), 1)]
----------------------
original SRC word: fire
perturbed SRC word: word
[(('replace', 'ausgebrochen', 'herumgesprochen')

[(('replace', 'sie', 'er durch'), 1), (('replace', 'durchquert hatten', 'gegangen war'), 1)]
----------------------
original SRC word: funny
perturbed SRC word: and
[(('insert', '', ', das'), 1), (('replace', 'alle halten', 'jeder hält'), 1)]
----------------------
original SRC word: fun
perturbed SRC word: it
[(('replace', 'zu bieten', 'bereitzustellen'), 1), (('insert', '', 'das war'), 1)]
----------------------
original SRC word: found
perturbed SRC word: am
[(('replace', 'sich', 'selbst'), 1), (('replace', 'Schauen', 'Sehen'), 1)]
----------------------
original SRC word: françois
perturbed SRC word: Pierre
[(('replace', 'zu zerstören', 'auszureißen'), 1), (('replace', 'Nr . 694', 'Nummer sechshundertvierundneunzig'), 1)]
----------------------
original SRC word: found
perturbed SRC word: find
[(('delete', 'sich', ''), 1), (('replace', 'an der Spitze der polnischen katholischen', 'die polnisch-katholische'), 1)]
----------------------
original SRC word: found
perturbed SRC word: ha

[(('replace', 'Die Torte', 'Der Kuchen'), 1), (('replace', 'Haben', 'Behalten'), 1)]
----------------------
original SRC word: fear
perturbed SRC word: It
[(('replace', 'den Bullen', 'der Polizei'), 1), (('replace', 'wecke', 'inspiriere'), 1)]
----------------------
original SRC word: families
perturbed SRC word: people
[(('replace', 'ihre wertvollste Sache', 'ihr wertvollstes Gut'), 1), (('replace', 'an Land gingen', 'landeten'), 1)]
----------------------
original SRC word: families
perturbed SRC word: all
[(('insert', '', 'der Nachkommen'), 1), (('replace', 'sind die', 'gehören zur'), 1)]
----------------------
original SRC word: fear
perturbed SRC word: violence
[(('replace', 'habe', 'mache'), 1), (('replace', 'verlor', 'hat'), 1)]
----------------------
original SRC word: eyes
perturbed SRC word: most
[(('replace', 'kamen', 'die'), 1), (('replace', 'Gedränge', 'Andrang'), 1)]
----------------------
original SRC word: expression
perturbed SRC word: term
[(('replace', 'bestimmt', 'f

[(('replace', 'entspricht', 'nahe kommt'), 1), (('replace', 'stellt', 'steht für'), 1)]
----------------------
original SRC word: figure
perturbed SRC word: official
[(('replace', 'entspricht', 'nahe steht'), 1), (('replace', 'stellt', 'repräsentiert'), 1)]
----------------------
original SRC word: figures
perturbed SRC word: rates
[(('replace', 'steht', 'gehört'), 1), (('replace', 'verschiedene', 'unterschiedliche'), 1)]
----------------------
original SRC word: fifteen
perturbed SRC word: one
[(('replace', 'Henri Mondor Straße', 'Henri-Mondor-Straße'), 1), (('replace', 'Tandenet-Weg', 'Tandenet-Pfad'), 1)]
----------------------
original SRC word: field
perturbed SRC word: world
[(('replace', 'stoßen', 'stehen'), 1), (('replace', 'an einigen Referenzzahlen', ', dass einige Fachkräfte fehlen'), 1)]
----------------------
original SRC word: features
perturbed SRC word: contains
[(('insert', '', 'zu erstellen'), 1), (('replace', 'verschwunden', 'verflogen'), 1)]
----------------------
o

[(('replace', 'Demnach', 'Ihren Angaben zufolge'), 1), (('replace', 'Weltraum', 'operativen Raum'), 1)]
----------------------
original SRC word: goal
perturbed SRC word: job
[(('replace', 'verhindern', 'entziehen'), 1), (('replace', 'erreichen', 'übernehmen'), 1)]
----------------------
original SRC word: girls
perturbed SRC word: children
[(('replace', 'Wir sehen uns', 'Bis'), 1), (('replace', 'jemand', 'ein vierter Taucher'), 1)]
----------------------
original SRC word: guard
perturbed SRC word: governor
[(('replace', 'Waffen', 'Gewehre'), 1), (('replace', 'bat', 'aufforderte'), 1)]
----------------------
original SRC word: grow
perturbed SRC word: get
[(('replace', 'begannen', 'fingen'), 1), (('replace', 'besteht die', 'gibt es eine'), 1)]
----------------------
original SRC word: group
perturbed SRC word: ones
[(('replace', 'Einige Leute', 'Manche Menschen'), 1), (('replace', 'umgab', 'umringte'), 1)]
----------------------
original SRC word: glass
perturbed SRC word: wood
[(('re

[]
----------------------
original SRC word: thirty
perturbed SRC word: twenty
[]
----------------------
original SRC word: seventy
perturbed SRC word: eighty
[]
----------------------
original SRC word: cannot
perturbed SRC word: will
[]
----------------------
original SRC word: cannot
perturbed SRC word: should
[]
----------------------
original SRC word: cannot
perturbed SRC word: must
[]
----------------------
original SRC word: cannot
perturbed SRC word: could
[]
----------------------
original SRC word: cannot
perturbed SRC word: can
[]
----------------------
original SRC word: forty
perturbed SRC word: twenty
[]
----------------------
original SRC word: seventy
perturbed SRC word: thirty
[]
----------------------
original SRC word: seventy
perturbed SRC word: forty
[]
----------------------
original SRC word: doesn
perturbed SRC word: don
[]


# Comments

--> starts to make sense, yet have not seen bias (even gender bias)

--> A bigger dataset for inference could help?

Some correlation is good, some correlation is bad. Is it a good idea to prevent these correlation??

In [None]:
# Statistical test to see if SRC_similarity is higher than Trans_similarity
print(output["Trans-edit_distance"].mean() - output["SRC-edit_distance"].mean())
stats.ttest_rel(output["SRC-edit_distance"], 
                output["Trans-edit_distance"], 
                alternative='less')

Tiny pvalue --> Indeed SRC-edit_distance is significantly lower than Trans-edit_distance


(Careful with this tho, bc with number of samples too large then statistical test does not make sense)

In [None]:
_ = plt.hist(output["#TransChanges-#SrcChanges"], bins=50)

In [None]:
print(output["ChangesSpread/SentenceLength"].describe())
output["ChangesSpread/SentenceLength"].plot.box()

Some changes seems to have the same meaning but different phrasing, e.g., noun index 24, 36, 47

Both for en-de and en-vi


Kind of bias: en-vi adjective sample 82

Should we cherry-pick examples? Or cherry-pick the replacement?


Or narrow down scope of perturbation? (e.g., on countries, jobs, gender, ...?)



Some cherry-picked examples anyway:

- He comes from England --> Ông ấy đến từ Anh
- He comes from Vietnam --> Hắn đến từ Việt Nam
- He comes from North Korea --> Hắn đến từ Bắc Triều Tiên



- He is european --> Hắn là người Châu Âu
- He is asian --> Anh ấy là người châu Á.



- He has black hair --> Hắn có tóc đen.
- He has blonde hair --> Anh ấy có tóc vàng


But if we limit this then it would hurt the model overal performance as well? 

*Jan: some kind of loss to minimize the number of changes, but not completely forbidden the changes*


# Translation quality vs #changes

In [None]:
from nltk.translate.gleu_score import sentence_gleu

output["OriginalTran_Quality"] = output.apply(
    lambda x: sentence_gleu([nltk.word_tokenize(x['REF'])], nltk.word_tokenize(x['OriginalSRC-Trans'])), axis=1)

In [None]:
output.plot.scatter(x='OriginalTran_Quality', y="#TransChanges-#SrcChanges/SentenceLength")

In [None]:
np.corrcoef(output['OriginalTran_Quality'], output["#TransChanges-#SrcChanges/SentenceLength"])

In [None]:
hist = plt.hist(output["OriginalTran_Quality"], bins='sturges')
bin_boundaries = hist[1]

In [None]:
# # Use bins with same number of samples instead of equal-sized bins

# results, bin_boundaries = pd.qcut(output["OriginalTran_Quality"], q=5, retbins=True)
# bin_boundaries


# Remove bins with too few samples
cut_point = 99999
for i, value in enumerate(hist[0]):
    if value < 5:
        cut_point = i
        break
        
bin_boundaries = bin_boundaries[:i]



In [None]:
bin_boundaries

X = output['OriginalTran_Quality']
Y = output["#TransChanges-#SrcChanges/SentenceLength"]

x_plot = [(bin_boundaries[i] + bin_boundaries[i+1])/2 for i in range(0, len(bin_boundaries)-1)]
y_plot = [stats.trim_mean(Y[(bin_boundaries[i] < X) & (X < bin_boundaries[i+1])], 0.1) for i in range(0, len(bin_boundaries)-1)]
plt.plot(x_plot, y_plot)
plt.xlabel('OriginalTrans_Quality')
plt.ylabel('Avg_changes')

Most of the time downward trend (not as clear for en-de with verb, adverb, pronoun; en-vi adverb, pronoun)

**Note**: the plot has outliers removed in both X and Y dimensions, by removing too small bins (X) and trimmed-mean (Y)

# #changes vs translation quality

In [None]:
hist = plt.hist(output["#TransChanges-#SrcChanges"], bins=20)
bin_boundaries = hist[1]
hist

In [None]:
# # Use bins with same number of samples instead of equal-sized bins
# results, bin_boundaries = pd.qcut(output["#TransChanges-#SrcChanges"], q=5, retbins=True)
# bin_boundaries


# Remove bins with too few samples
cut_point = 99999
for i, value in enumerate(hist[0]):
    if value < 10:
        cut_point = i
        break
        
bin_boundaries = bin_boundaries[:i]

In [None]:
bin_boundaries

X = output['#TransChanges-#SrcChanges']
Y = output["OriginalTran_Quality"]

x_plot = [(bin_boundaries[i] + bin_boundaries[i+1])/2 for i in range(0, len(bin_boundaries)-1)]

y_plot = [stats.trim_mean(Y[(bin_boundaries[i] <= X) & (X <= bin_boundaries[i+1])], 0.1) for i in range(0, len(bin_boundaries)-1)]
plt.plot(x_plot, y_plot)
plt.xlabel('Avg_changes')
plt.ylabel('OriginalTran_Quality')

# SentenceLength vs #changes

In [None]:
output['OriginalSRC-length'] = output.apply(
    lambda x: len(nltk.word_tokenize(x['SRC'])), axis=1
)

In [None]:
output.plot.scatter(x='OriginalSRC-length', y="#TransChanges-#SrcChanges")

In [None]:
np.corrcoef(output['OriginalSRC-length'], output["#TransChanges-#SrcChanges"])

In [None]:
hist = plt.hist(output["OriginalSRC-length"], bins=20)
bin_boundaries = hist[1]

In [None]:
# Remove bins with too few samples
cut_point = 99999
for i, value in enumerate(hist[0]):
    if value < 10:
        cut_point = i
        break
        
bin_boundaries = bin_boundaries[:i]

In [None]:
X = output['OriginalSRC-length']
Y = output["#TransChanges-#SrcChanges"]

x_plot = [(bin_boundaries[i] + bin_boundaries[i+1])/2 for i in range(0, len(bin_boundaries)-1)]
y_plot = [stats.trim_mean(Y[(bin_boundaries[i] < X) & (X < bin_boundaries[i+1])], 0.1) for i in range(0, len(bin_boundaries)-1)]
plt.plot(x_plot, y_plot)
plt.xlabel('OriginalSRC-length')
plt.ylabel('Avg_changes')

# Beam_size vs #changes

In [None]:
beam_dict = {}
beam_values = [1,2,3,4,5]
for beam in beam_values:
    beam_dict[beam] = read_output_df(dataset, perturb_type, beam, replacement_strategy)
    # Make sure the df all have the same index
    if beam > 1:
        assert beam_dict[beam].index.equals(beam_dict[beam].index)


In [None]:
plt.plot(beam_values,
              [stats.trim_mean(beam_dict[x]['#TransChanges-#SrcChanges'], 0.1) for x in beam_values])
plt.xlabel('beam')
plt.ylabel('mean_changes')

The mean might not saying anything

In [None]:
fig, ax = plt.subplots()
ax.boxplot([beam_dict[x]['#TransChanges-#SrcChanges'] for x in beam_values])
ax.set_xticklabels(beam_values)
ax.set_xlabel('beam')
ax.set_ylabel('#changes')

# Perturbed word type vs #changes

In [None]:
word_type_dict = {}
word_type_values = ["noun", "verb", "adjective", "adverb", "pronoun"]
for word_type in word_type_values:
    word_type_dict[word_type] = read_output_df(dataset, perturb_type=word_type, beam=beam, replacement_strategy=replacement_strategy)

    
print('--------------------------------')
print('word type    -   trimmed-mean #changes')

for word_type in word_type_values:
    print(f"{word_type} - {stats.trim_mean(word_type_dict[word_type]['#TransChanges-#SrcChanges'], 0.1)}")


In [None]:
fig, ax = plt.subplots()
ax.boxplot([word_type_dict[x]['#TransChanges-#SrcChanges'] for x in word_type_values])
ax.set_xticklabels(word_type_values)
ax.set_xlabel('word_type')
ax.set_ylabel('#changes')

# #Changes per sentence across word types

See if the chaos changes are sentence-specific. Excluding perturbing pronouns bc not many samples have pronoun

In [None]:
# Find sentences that has multiple word types perturbed
word_type_values = ["noun", "verb", "adjective", "adverb"]
index_intersection = word_type_dict[word_type_values[0]].index
for i in range(1, len(word_type_values)):
    index_intersection = \
        index_intersection.intersection(word_type_dict[word_type_values[i]].index)

len(index_intersection)

In [None]:
changes_per_word_type = pd.DataFrame()
for word_type in word_type_values:
    changes_per_word_type[word_type] = word_type_dict[word_type]["#TransChanges-#SrcChanges"].loc[index_intersection]
    
# Count the number of samples where the changes in trans always bigger than changes in SRC
changes_per_word_type[(changes_per_word_type['noun'] > 0) & (changes_per_word_type['verb'] > 0) & \
                      (changes_per_word_type['adjective'] > 0) & (changes_per_word_type['adverb'] > 0)]



Small portion of rows --> not sentence-specific

In [None]:
import spacy 
from spacy import displacy 
nlp = spacy.load("en_core_web_sm")
sentence = "He is from Vietnam"
doc = nlp(sentence)

print(f"{'Node (from)-->':<15} {'Relation':^10} {'-->Node (to)':>15}\n")

for token in doc:
    print("{:<15} {:^10} {:>15}".format(str(token.head.text), str(token.dep_), str(token.text)))

In [None]:
for token in doc:
    print("------------------------------------------------")
    print(f"Token: {token.text}")
    print(f"Ancestors: {list(token.ancestors)}")
    print(f"Children: {list(token.children)}")

In [None]:
import spacy 
from spacy import displacy 
nlp = spacy.load("de_core_news_sm")
sentence = "Er kommt aus Vietnam"
doc = nlp(sentence)

print(f"{'Node (from)-->':<15} {'Relation':^10} {'-->Node (to)':>15}\n")

for token in doc:
    print("{:<15} {:^10} {:>15}".format(str(token.head.text), str(token.dep_), str(token.text)))