In [1]:
import numpy as np
import pandas as pd

from utils import load_config

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='darkgrid')

config = load_config()

In [2]:
def df_to_latex(df, caption: str = None):
    
    def array_fixer(x):
        if isinstance(x, list) or isinstance(x, np.ndarray):
            return f'${np.mean(x):.3f} \pm {np.std(x):.3f}$'
        else:
            return f'${x:.3f}$'
    
    result = """\
\\begin{{table}}[H]
\t\\resizebox{{\\textwidth}}{{!}}{{
\t\t\\begin{{tabular}}{{|>{{\\bfseries}}l|{}}}
\t\t\t\\hline
{}
\t\t\\end{{tabular}}
\t}}{}
\\end{{table}}\
"""

    columns = 'c|' * df.shape[1]
        
    body = ['& ' + ' & '.join(df.columns)]
    
    body += [
        df.index[i] +
        '&' + 
        ' & '.join(map(array_fixer, df.iloc[i].values)) 
        for i in range(len(df))
    ]
    
    for i in range(len(body)):
        body[i] = '\t' * 3 + body[i] + ' \\\\ \\hline'
        

    body = '\n'.join(body).replace('_', '\\_')
    
    if caption is not None:
        caption = f'\caption{{{caption}}}'
    else:
        caption = ''

    result = result.format(columns, body, caption)

    return result


tables_1 = open('tables_1.txt', 'w')

In [3]:
model_names = [
    'xlm-r',
    'm-bert',
    'xlm-r en',
    'm-bert en',
    'xlm-r adv',
    'm-bert adv',
    'xlm-r en + adv',
    'm-bert en + adv'
]

model_args = [
    (False, False),
    (False, False),
    (True, False),
    (True, False),
    (False, True),
    (False, True),
    (True, True),
    (True, True),
]

In [4]:
def get_model_attacks(language, model_name, only_english: bool = False, adv_pretrained: bool = False):
    return pd.read_csv(
        f'results/{language}/{model_name}_{int(only_english)}_{int(adv_pretrained)}.csv',
        index_col=0
    )

In [5]:
get_model_attacks('test', 'm-bert')

Unnamed: 0,intent_acc,slot_f1,sementic_frame_acc,loss
en,0.978808,0.947356,0.854305,0.353075
de,0.976159,0.945361,0.854305,0.323492
es,0.957616,0.885226,0.65298,0.606453
fr,0.968212,0.925663,0.803974,0.335643
ja,0.954967,0.935154,0.740397,0.419405
pt,0.968212,0.924304,0.807947,0.412778
zh,0.956291,0.944663,0.796026,0.453742


In [6]:
language = 'en'

index_renamer = {
    'intent_acc': 'Intent accuracy',
    'slot_f1': 'Slot F1 score',
    'sementic_frame_acc': 'Semantic accuracy',
    'loss': 'Loss'
}

In [7]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ (АНГЛ)

output = {'xlm-r': {}, 'm-bert': {}}

for model_name, model_arg in zip(model_names, model_args):
    df = get_model_attacks(language, model_name.split()[0], *model_arg)

    output[model_name.split()[0]][model_name] = df.loc['No attack'].to_dict()

output['xlm-r'] = pd.DataFrame.from_dict(output['xlm-r']).rename(index=index_renamer)
output['m-bert'] = pd.DataFrame.from_dict(output['m-bert']).rename(index=index_renamer)

output

{'xlm-r':                       xlm-r  xlm-r en  xlm-r adv  xlm-r en + adv
 Intent accuracy    0.980132  0.901987   0.981457        0.928477
 Slot F1 score      0.943711  0.870407   0.946577        0.888136
 Semantic accuracy  0.826490  0.558940   0.833113        0.613245
 Loss               0.317247  0.729068   0.319632        0.621493,
 'm-bert':                      m-bert  m-bert en  m-bert adv  m-bert en + adv
 Intent accuracy    0.978808   0.952318    0.974834         0.958940
 Slot F1 score      0.947356   0.899340    0.950394         0.899547
 Semantic accuracy  0.854305   0.671523    0.860927         0.674172
 Loss               0.353075   0.584221    0.325914         0.567392}

In [8]:
output['xlm-r']

Unnamed: 0,xlm-r,xlm-r en,xlm-r adv,xlm-r en + adv
Intent accuracy,0.980132,0.901987,0.981457,0.928477
Slot F1 score,0.943711,0.870407,0.946577,0.888136
Semantic accuracy,0.82649,0.55894,0.833113,0.613245
Loss,0.317247,0.729068,0.319632,0.621493


In [9]:
print(
    df_to_latex(
        output['xlm-r'],
        'Сравнение моделей XLM-R между собой на тестовой выборке (английский язык)'
    ),
    file=tables_1,
    flush=True
)

In [10]:
print(
    df_to_latex(
        output['m-bert'],
        'Сравнение моделей M-BERT между собой на тестовой выборке (английский язык)'
    ),
    file=tables_1,
    flush=True
)

In [11]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА СРЕДНЕМ ПО ОСТАЛЬНЫМ ЯЗЫКАМ

output = {'xlm-r': {}, 'm-bert': {}}

for model_name, model_arg in zip(model_names, model_args):
    df = get_model_attacks('test', model_name.split()[0], *model_arg)
    
    output[model_name.split()[0]][model_name] = {}
    
    for key in df.columns:
        output[model_name.split()[0]][model_name][key] = []

    for idx, row in df.iterrows():
        if idx != language:
            for key in df.columns:
                output[model_name.split()[0]][model_name][key].append(row[key])

output['xlm-r'] = pd.DataFrame.from_dict(output['xlm-r']).rename(index=index_renamer)
output['m-bert'] = pd.DataFrame.from_dict(output['m-bert']).rename(index=index_renamer)

output

{'xlm-r':                                                                xlm-r  \
 Intent accuracy    [0.976158940397351, 0.9668874172185432, 0.9708...   
 Slot F1 score      [0.9385574354407836, 0.9075166269293512, 0.924...   
 Semantic accuracy  [0.8264900662251655, 0.6966887417218544, 0.806...   
 Loss               [0.3457097543532176, 0.5021671323865456, 0.333...   
 
                                                             xlm-r en  \
 Intent accuracy    [0.8754966887417218, 0.8781456953642384, 0.879...   
 Slot F1 score      [0.6687760678843665, 0.751044776119403, 0.6120...   
 Semantic accuracy  [0.2503311258278146, 0.3470198675496688, 0.150...   
 Loss               [1.1868144895685346, 1.21023256772835, 1.25222...   
 
                                                            xlm-r adv  \
 Intent accuracy    [0.9735099337748344, 0.9642384105960264, 0.976...   
 Slot F1 score      [0.9401633259094284, 0.9055847733533684, 0.929...   
 Semantic accuracy  [0.831788079470198

In [12]:
output['xlm-r']

Unnamed: 0,xlm-r,xlm-r en,xlm-r adv,xlm-r en + adv
Intent accuracy,"[0.976158940397351, 0.9668874172185432, 0.9708...","[0.8754966887417218, 0.8781456953642384, 0.879...","[0.9735099337748344, 0.9642384105960264, 0.976...","[0.8900662251655629, 0.9125827814569536, 0.871..."
Slot F1 score,"[0.9385574354407836, 0.9075166269293512, 0.924...","[0.6687760678843665, 0.751044776119403, 0.6120...","[0.9401633259094284, 0.9055847733533684, 0.929...","[0.7291851851851852, 0.7882149901380672, 0.622..."
Semantic accuracy,"[0.8264900662251655, 0.6966887417218544, 0.806...","[0.2503311258278146, 0.3470198675496688, 0.150...","[0.8317880794701987, 0.6927152317880795, 0.813...","[0.3973509933774834, 0.4039735099337748, 0.108..."
Loss,"[0.3457097543532176, 0.5021671323865456, 0.333...","[1.1868144895685346, 1.21023256772835, 1.25222...","[0.3537513591385887, 0.5311582064170293, 0.332...","[0.9317734229035284, 1.1258697358499232, 1.460..."


In [13]:
print(
    df_to_latex(
        output['xlm-r'],
        'Сравнение моделей XLM-R между собой на тестовой выборке (все языки кроме английского)'
    ),
    file=tables_1, 
    flush=True
)

In [14]:
print(
    df_to_latex(
        output['m-bert'],
        'Сравнение моделей M-BERT между собой на тестовой выборке (все языки кроме английского)'
    ),
    file=tables_1, 
    flush=True
)

In [15]:
tables_2 = open('tables_2.txt', 'w')

In [16]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА СРЕДНЕМ ПО АТАКЕ WORD LEVEL

output = {'xlm-r': {}, 'm-bert': {}}

for model_name, model_arg in zip(model_names, model_args):
    df = get_model_attacks(language, model_name.split()[0], *model_arg)
    
    output[model_name.split()[0]][model_name] = {key: [] for key in df.columns}
    
    for idx, row in df.iterrows():
        if 'Word level' in idx:
            for key in df.columns:
                output[model_name.split()[0]][model_name][key].append(row[key])
    

output['xlm-r'] = pd.DataFrame.from_dict(output['xlm-r']).rename(index=index_renamer)
output['m-bert'] = pd.DataFrame.from_dict(output['m-bert']).rename(index=index_renamer)

output

{'xlm-r':                                                                xlm-r  \
 Intent accuracy    [0.9311258278145697, 0.8768211920529801, 0.849...   
 Slot F1 score      [0.7666752510945146, 0.5888128148817983, 0.602...   
 Semantic accuracy  [0.343046357615894, 0.1086092715231788, 0.0860...   
 Loss               [1.654655853541274, 3.0209231000197563, 3.6366...   
 
                                                             xlm-r en  \
 Intent accuracy    [0.8092715231788079, 0.7827814569536424, 0.773...   
 Slot F1 score      [0.6424162836506894, 0.4669049673363737, 0.499...   
 Semantic accuracy  [0.2013245033112582, 0.0569536423841059, 0.055...   
 Loss               [2.3026634461001345, 3.5072542830517417, 3.843...   
 
                                                            xlm-r adv  \
 Intent accuracy    [0.9350993377483444, 0.8847682119205298, 0.895...   
 Slot F1 score      [0.7684491299377619, 0.6085357846355877, 0.590...   
 Semantic accuracy  [0.347019867549668

In [17]:
print(
    df_to_latex(
        output['xlm-r'],
        'Сравнение моделей XLM-R после word-level атаки'
    ),
    file=tables_2,
    flush=True
)

In [18]:
print(
    df_to_latex(
        output['m-bert'],
        'Сравнение моделей M-BERT после word-level атаки'
    ),
    file=tables_2,
    flush=True
)

In [19]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА СРЕДНЕМ ПО АТАКЕ ALIGNMENTS

output = {'xlm-r': {}, 'm-bert': {}}

for model_name, model_arg in zip(model_names, model_args):
    df = get_model_attacks(language, model_name.split()[0], *model_arg)
    
    output[model_name.split()[0]][model_name] = {key: [] for key in df.columns}
    
    for idx, row in df.iterrows():
        if 'Align' in idx:
            for key in df.columns:
                output[model_name.split()[0]][model_name][key].append(row[key])
    

output['xlm-r'] = pd.DataFrame.from_dict(output['xlm-r']).rename(index=index_renamer)
output['m-bert'] = pd.DataFrame.from_dict(output['m-bert']).rename(index=index_renamer)

output

{'xlm-r':                                                                xlm-r  \
 Intent accuracy    [0.9536423841059604, 0.9456953642384106, 0.928...   
 Slot F1 score      [0.8018660101075548, 0.8291751223588396, 0.750...   
 Semantic accuracy  [0.5112582781456954, 0.5112582781456954, 0.336...   
 Loss               [1.3409015601108734, 1.1233092033132714, 1.745...   
 
                                                             xlm-r en  \
 Intent accuracy    [0.8079470198675497, 0.8357615894039735, 0.740...   
 Slot F1 score      [0.6265863350852455, 0.7040143128704014, 0.568...   
 Semantic accuracy  [0.1629139072847682, 0.2291390728476821, 0.099...   
 Loss               [2.049562258469431, 1.88231446617528, 2.750588...   
 
                                                            xlm-r adv  \
 Intent accuracy    [0.9589403973509932, 0.9576158940397352, 0.929...   
 Slot F1 score      [0.8086155443103672, 0.8473118279569892, 0.770...   
 Semantic accuracy  [0.537748344370860

In [20]:
print(
    df_to_latex(
        output['xlm-r'],
        'Сравнение моделей XLM-R после phrase-level атаки'
    ),
    file=tables_2,
    flush=True
)

In [21]:
print(
    df_to_latex(
        output['m-bert'],
        'Сравнение моделей M-BERT после phrase-level атаки'
    ),
    file=tables_2,
    flush=True
)

In [22]:
tables_1.close()
tables_2.close()