In [1]:
import numpy as np
import pandas as pd

from utils import load_config
from dataset import read_atis

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='darkgrid')

config = load_config()

data_path = '/home/lesha/diploma/pieces of paper/reports/main/tables/'

In [2]:
test = read_atis('test', ['en'])

from adversarial import AdversarialWordLevel, AdversarialAlignments

In [2]:
label_num = 0

def df_to_latex(df, caption: str = None):
    
    def array_fixer(x):
        if isinstance(x, list) or isinstance(x, np.ndarray):
            return f'${np.mean(x):.3f} \pm {np.std(x):.3f}$'
        else:
            return f'${x:.3f}$'
    
    result = """\
\\begin{{table}}[H]
\t\\resizebox{{\\textwidth}}{{!}}{{
\t\t\\begin{{tabular}}{{|>{{\\bfseries}}l|{}}}
\t\t\t\\hline
{}
\t\t\\end{{tabular}}
\t}}{}
\\end{{table}}\
"""

    columns = 'c|' * df.shape[1]
        
    body = ['& ' + ' & '.join(df.columns)]
    
    body += [
        df.index[i] +
        '&' + 
        ' & '.join(map(array_fixer, df.iloc[i].values)) 
        for i in range(len(df))
    ]
    
    for i in range(len(body)):
        body[i] = '\t' * 3 + body[i] + ' \\\\ \\hline'
        

    body = '\n'.join(body).replace('_', '\\_')
    
    if caption is not None:
        caption = f'\caption{{{caption}}}'
    else:
        caption = ''
        
    global label_num
        
    caption += f'\\label{{tab:table{label_num}}}'

    label_num += 1

    result = result.format(columns, body, caption)

    return result

In [3]:
model_names = [
    'xlm-r',
    'm-bert',
    'xlm-r en',
    'm-bert en',
    'xlm-r adv',
    'm-bert adv',
    'xlm-r en + adv',
    'm-bert en + adv'
]

model_args = [
    (False, False),
    (False, False),
    (True, False),
    (True, False),
    (False, True),
    (False, True),
    (True, True),
    (True, True),
]

In [4]:
def get_model_attacks(language, model_name, only_english: bool = False, adv_pretrained: bool = False):
    return pd.read_csv(
        f'results/{language}/{model_name}_{int(only_english)}_{int(adv_pretrained)}.csv',
        index_col=0
    )

In [5]:
index_renamer = {
    'intent_acc': 'Intent accuracy',
    'slot_f1': 'Slots F1 score',
    'sementic_frame_acc': 'Semantic accuracy',
}

In [6]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ (БЕЗ ЗАЩИТЫ)

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('test', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        values = df[key].to_dict()
        values['avg'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = values

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

output

{'Intent accuracy':                  en        de        es        fr        ja        pt  \
 xlm-r      0.980132  0.976159  0.966887  0.970861  0.969536  0.966887   
 m-bert     0.978808  0.976159  0.957616  0.968212  0.954967  0.968212   
 xlm-r en   0.901987  0.875497  0.878146  0.879470  0.785430  0.774834   
 m-bert en  0.952318  0.819868  0.870199  0.875497  0.747020  0.838411   
 
                  zh       avg  
 xlm-r      0.964238  0.970672  
 m-bert     0.956291  0.965752  
 xlm-r en   0.847682  0.849007  
 m-bert en  0.815894  0.845601  ,
 'Slots F1 score':                  en        de        es        fr        ja        pt  \
 xlm-r      0.943711  0.938557  0.907517  0.924231  0.928918  0.923868   
 m-bert     0.947356  0.945361  0.885226  0.925663  0.935154  0.924304   
 xlm-r en   0.870407  0.668776  0.751045  0.612022  0.573139  0.672652   
 m-bert en  0.899340  0.557940  0.783145  0.534114  0.621519  0.518100   
 
                  zh       avg  
 xlm-r      0.941942

In [7]:
with open(data_path + '1.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей между собой на тестовой выборке датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам языки тестовых подвыборок, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [8]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ WORD LEVEL

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Word' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

output

{'Intent accuracy':                  de        es        fr        ja        pt        zh  \
 xlm-r      0.931126  0.876821  0.849007  0.825166  0.900662  0.871523   
 m-bert     0.892715  0.891391  0.871523  0.819868  0.852980  0.851656   
 xlm-r en   0.809272  0.782781  0.773510  0.676821  0.553642  0.728477   
 m-bert en  0.810596  0.760265  0.793377  0.723179  0.760265  0.777483   
 
                 avg  
 xlm-r      0.875717  
 m-bert     0.863355  
 xlm-r en   0.720751  
 m-bert en  0.770861  ,
 'Slots F1 score':                  de        es        fr        ja        pt        zh  \
 xlm-r      0.766675  0.588813  0.602636  0.551503  0.597709  0.746898   
 m-bert     0.685462  0.516985  0.509909  0.427834  0.494069  0.684211   
 xlm-r en   0.642416  0.466905  0.499416  0.508277  0.542569  0.640907   
 m-bert en  0.539088  0.385202  0.419195  0.362173  0.390605  0.585037   
 
                 avg  
 xlm-r      0.642372  
 m-bert     0.553078  
 xlm-r en   0.550082  
 m-bert en 

In [9]:
with open(data_path + '2.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей между собой после word-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [10]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ ALIGNMENTS

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Align' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

output

{'Intent accuracy':                  de        es        fr        ja        pt        zh  \
 xlm-r      0.953642  0.945695  0.928477  0.952318  0.964238  0.949669   
 m-bert     0.948344  0.935099  0.939073  0.950993  0.940397  0.933775   
 xlm-r en   0.807947  0.835762  0.740397  0.749669  0.442384  0.784106   
 m-bert en  0.809272  0.833113  0.834437  0.805298  0.860927  0.829139   
 
                 avg  
 xlm-r      0.949007  
 m-bert     0.941280  
 xlm-r en   0.726711  
 m-bert en  0.828698  ,
 'Slots F1 score':                  de        es        fr        ja        pt       zh       avg
 xlm-r      0.801866  0.829175  0.750731  0.443746  0.813370  0.60896  0.707975
 m-bert     0.784309  0.803944  0.758233  0.449928  0.782991  0.61937  0.699796
 xlm-r en   0.626586  0.704014  0.568657  0.364769  0.679594  0.56056  0.584030
 m-bert en  0.539165  0.698585  0.531226  0.366416  0.530324  0.56319  0.538151,
 'Semantic accuracy':                  de        es        fr        ja   

In [11]:
with open(data_path + '3.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей между собой после phrase-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [12]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ (С ЗАЩИТОЙ)

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('test', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        values = df[key].to_dict()
        values['avg'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = values

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

output

{'Intent accuracy':                        en        de        es        fr        ja        pt  \
 xlm-r adv        0.981457  0.973510  0.964238  0.976159  0.972185  0.966887   
 m-bert adv       0.974834  0.976159  0.964238  0.972185  0.960265  0.969536   
 xlm-r en + adv   0.928477  0.890066  0.912583  0.871523  0.789404  0.880795   
 m-bert en + adv  0.958940  0.847682  0.900662  0.892715  0.719205  0.900662   
 
                        zh       avg  
 xlm-r adv        0.966887  0.971618  
 m-bert adv       0.961589  0.968401  
 xlm-r en + adv   0.815894  0.869820  
 m-bert en + adv  0.758940  0.854115  ,
 'Slots F1 score':                        en        de        es        fr        ja        pt  \
 xlm-r adv        0.946577  0.940163  0.905585  0.929329  0.927940  0.928786   
 m-bert adv       0.950394  0.942157  0.900359  0.927521  0.935022  0.920078   
 xlm-r en + adv   0.888136  0.729185  0.788215  0.622503  0.447176  0.742906   
 m-bert en + adv  0.899547  0.565747  0.75919

In [13]:
with open(data_path + '4.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей с защитой между собой на тестовой выборке датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам языки тестовых подвыборок, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [14]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ WORD LEVEL

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Word' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

output

{'Intent accuracy':                        de        es        fr        ja        pt        zh  \
 xlm-r adv        0.935099  0.884768  0.895364  0.838411  0.917881  0.856954   
 m-bert adv       0.923179  0.894040  0.890066  0.866225  0.899338  0.884768   
 xlm-r en + adv   0.842384  0.817219  0.811921  0.613245  0.810596  0.720530   
 m-bert en + adv  0.864901  0.827815  0.854305  0.760265  0.855629  0.749669   
 
                       avg  
 xlm-r adv        0.888079  
 m-bert adv       0.892936  
 xlm-r en + adv   0.769316  
 m-bert en + adv  0.818764  ,
 'Slots F1 score':                        de        es        fr        ja        pt        zh  \
 xlm-r adv        0.768449  0.608536  0.590965  0.519163  0.607991  0.743609   
 m-bert adv       0.703900  0.531871  0.534875  0.470119  0.562741  0.685425   
 xlm-r en + adv   0.648408  0.509328  0.508400  0.455320  0.541513  0.662532   
 m-bert en + adv  0.531093  0.404718  0.371309  0.440516  0.417090  0.563148   
 
             

In [15]:
with open(data_path + '5.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей с защитой между собой после word-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [16]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ ALIGNMENTS

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Align' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

output

{'Intent accuracy':                        de        es        fr        ja        pt        zh  \
 xlm-r adv        0.958940  0.957616  0.929801  0.960265  0.957616  0.947020   
 m-bert adv       0.956291  0.949669  0.944371  0.957616  0.954967  0.945695   
 xlm-r en + adv   0.870199  0.856954  0.810596  0.794702  0.852980  0.776159   
 m-bert en + adv  0.846358  0.890066  0.892715  0.765563  0.900662  0.784106   
 
                       avg  
 xlm-r adv        0.951876  
 m-bert adv       0.951435  
 xlm-r en + adv   0.826932  
 m-bert en + adv  0.846578  ,
 'Slots F1 score':                        de        es        fr        ja        pt        zh  \
 xlm-r adv        0.808616  0.847312  0.770889  0.431993  0.820305  0.617295   
 m-bert adv       0.807282  0.848049  0.790042  0.445657  0.822081  0.635233   
 xlm-r en + adv   0.682781  0.775646  0.647887  0.326389  0.721132  0.569833   
 m-bert en + adv  0.612938  0.753839  0.621278  0.323539  0.630590  0.523289   
 
             

In [17]:
with open(data_path + '6.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей с защитой между собой после phrase-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )