In [1]:
import numpy as np
import pandas as pd

from utils import load_config

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='darkgrid')

config = load_config()

In [2]:
def df_to_latex(df, caption: str = None):
    
    def array_fixer(x):
        if isinstance(x, list) or isinstance(x, np.ndarray):
            return f'${np.mean(x):.3f} \pm {np.std(x):.3f}$'
        else:
            return f'${x:.3f}$'
    
    result = """\
\\begin{{table}}[H]
\t\\resizebox{{\\textwidth}}{{!}}{{
\t\t\\begin{{tabular}}{{|>{{\\bfseries}}l|{}}}
\t\t\t\\hline
{}
\t\t\\end{{tabular}}
\t}}{}
\\end{{table}}\
"""

    columns = 'c|' * df.shape[1]
        
    body = ['& ' + ' & '.join(df.columns)]
    
    body += [
        df.index[i] +
        '&' + 
        ' & '.join(map(array_fixer, df.iloc[i].values)) 
        for i in range(len(df))
    ]
    
    for i in range(len(body)):
        body[i] = '\t' * 3 + body[i] + ' \\\\ \\hline'
        

    body = '\n'.join(body).replace('_', '\\_')
    
    if caption is not None:
        caption = f'\caption{{{caption}}}'
    else:
        caption = ''

    result = result.format(columns, body, caption)

    return result

In [3]:
model_names = [
    'xlm-r',
    'm-bert',
    'xlm-r en',
    'm-bert en',
    'xlm-r adv',
    'm-bert adv',
    'xlm-r en + adv',
    'm-bert en + adv'
]

model_args = [
    (False, False),
    (False, False),
    (True, False),
    (True, False),
    (False, True),
    (False, True),
    (True, True),
    (True, True),
]

In [4]:
def get_model_attacks(language, model_name, only_english: bool = False, adv_pretrained: bool = False):
    return pd.read_csv(
        f'results/{language}/{model_name}_{int(only_english)}_{int(adv_pretrained)}.csv',
        index_col=0
    )

In [10]:
get_model_attacks('en', 'm-bert')

Unnamed: 0,intent_acc,slot_f1,sementic_frame_acc,loss
No attack,0.978808,0.947356,0.854305,0.353075
Word level [de],0.909934,0.69403,0.243709,2.231606
Word level [es],0.876821,0.517572,0.082119,3.328885
Word level [fr],0.872848,0.510785,0.059603,3.951247
Word level [ja],0.817219,0.438087,0.088742,3.204326
Word level [pt],0.860927,0.496828,0.033113,3.891992
Word level [zh],0.855629,0.677183,0.211921,2.211372
Alignments [de],0.948344,0.785827,0.483444,1.437395
Alignments [es],0.937748,0.8041,0.445033,1.226217
Alignments [fr],0.940397,0.755213,0.349669,1.671828


In [5]:
language = 'en'

index_renamer = {
    'intent_acc': 'Intent accuracy',
    'slot_f1': 'Slot F1 score',
    'sementic_frame_acc': 'Semantic accuracy',
    'loss': 'Loss'
}

In [6]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ

output = {'xlm-r': {}, 'm-bert': {}}

for model_name, model_arg in zip(model_names, model_args):
    df = get_model_attacks(language, model_name.split()[0], *model_arg)

    output[model_name.split()[0]][model_name] = df.loc['No attack'].to_dict()

output['xlm-r'] = pd.DataFrame.from_dict(output['xlm-r']).rename(index=index_renamer)
output['m-bert'] = pd.DataFrame.from_dict(output['m-bert']).rename(index=index_renamer)

output

{'xlm-r':                       xlm-r  xlm-r en  xlm-r adv  xlm-r en + adv
 Intent accuracy    0.980132  0.901987   0.980132        0.962914
 Slot F1 score      0.943711  0.870407   0.947758        0.899080
 Semantic accuracy  0.826490  0.558940   0.842384        0.670199
 Loss               0.317247  0.729068   0.292712        0.574755,
 'm-bert':                      m-bert  m-bert en  m-bert adv  m-bert en + adv
 Intent accuracy    0.978808   0.952318    0.974834         0.948344
 Slot F1 score      0.947356   0.899340    0.951906         0.907951
 Semantic accuracy  0.854305   0.671523    0.846358         0.690066
 Loss               0.353075   0.584221    0.327805         0.577299}

In [7]:
output['xlm-r']

Unnamed: 0,xlm-r,xlm-r en,xlm-r adv,xlm-r en + adv
Intent accuracy,0.980132,0.901987,0.980132,0.962914
Slot F1 score,0.943711,0.870407,0.947758,0.89908
Semantic accuracy,0.82649,0.55894,0.842384,0.670199
Loss,0.317247,0.729068,0.292712,0.574755


In [8]:
print(df_to_latex(output['xlm-r'], 'Таблица сравнения моделей XLM-R между собой на тестовой выборке'))

\begin{table}[H]
	\resizebox{\textwidth}{!}{
		\begin{tabular}{|>{\bfseries}l|c|c|c|c|}
			\hline
			& xlm-r & xlm-r en & xlm-r adv & xlm-r en + adv \\ \hline
			Intent accuracy&$0.980$ & $0.902$ & $0.980$ & $0.963$ \\ \hline
			Slot F1 score&$0.944$ & $0.870$ & $0.948$ & $0.899$ \\ \hline
			Semantic accuracy&$0.826$ & $0.559$ & $0.842$ & $0.670$ \\ \hline
			Loss&$0.317$ & $0.729$ & $0.293$ & $0.575$ \\ \hline
		\end{tabular}
	}\caption{Таблица сравнения моделей XLM-R между собой на тестовой выборке}
\end{table}


In [9]:
print(df_to_latex(output['m-bert'], 'Таблица сравнения моделей M-BERT между собой на тестовой выборке'))

\begin{table}[H]
	\resizebox{\textwidth}{!}{
		\begin{tabular}{|>{\bfseries}l|c|c|c|c|}
			\hline
			& m-bert & m-bert en & m-bert adv & m-bert en + adv \\ \hline
			Intent accuracy&$0.979$ & $0.952$ & $0.975$ & $0.948$ \\ \hline
			Slot F1 score&$0.947$ & $0.899$ & $0.952$ & $0.908$ \\ \hline
			Semantic accuracy&$0.854$ & $0.672$ & $0.846$ & $0.690$ \\ \hline
			Loss&$0.353$ & $0.584$ & $0.328$ & $0.577$ \\ \hline
		\end{tabular}
	}\caption{Таблица сравнения моделей M-BERT между собой на тестовой выборке}
\end{table}


In [11]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА СРЕДНЕМ ПО АТАКЕ WORD LEVEL

output = {'xlm-r': {}, 'm-bert': {}}

for model_name, model_arg in zip(model_names, model_args):
    df = get_model_attacks(language, model_name.split()[0], *model_arg)
    
    output[model_name.split()[0]][model_name] = {key: [] for key in df.columns}
    
    for idx, row in df.iterrows():
        if 'Word level' in idx:
            for key in df.columns:
                output[model_name.split()[0]][model_name][key].append(row[key])
    

output['xlm-r'] = pd.DataFrame.from_dict(output['xlm-r']).rename(index=index_renamer)
output['m-bert'] = pd.DataFrame.from_dict(output['m-bert']).rename(index=index_renamer)

output

{'xlm-r':                                                                xlm-r  \
 Intent accuracy    [0.933774834437086, 0.8900662251655629, 0.8516...   
 Slot F1 score      [0.7617957522574081, 0.5970343392299687, 0.604...   
 Semantic accuracy  [0.343046357615894, 0.1311258278145695, 0.0900...   
 Loss               [1.6738503978440635, 2.946076341679221, 3.6615...   
 
                                                             xlm-r en  \
 Intent accuracy    [0.8079470198675497, 0.7894039735099337, 0.774...   
 Slot F1 score      [0.6421704201237983, 0.4658109108862009, 0.497...   
 Semantic accuracy  [0.1920529801324503, 0.0543046357615894, 0.060...   
 Loss               [2.306877105173312, 3.455510059155916, 3.84072...   
 
                                                            xlm-r adv  \
 Intent accuracy    [0.9390728476821192, 0.91523178807947, 0.87549...   
 Slot F1 score      [0.7783894507745488, 0.6121043570388028, 0.602...   
 Semantic accuracy  [0.386754966887417

In [12]:
print(df_to_latex(output['xlm-r'], 'Таблица сравнения моделей XLM-R после атаки Word level'))

\begin{table}[H]
	\resizebox{\textwidth}{!}{
		\begin{tabular}{|>{\bfseries}l|c|c|c|c|}
			\hline
			& xlm-r & xlm-r en & xlm-r adv & xlm-r en + adv \\ \hline
			Intent accuracy&$0.885 \pm 0.035$ & $0.727 \pm 0.081$ & $0.893 \pm 0.037$ & $0.851 \pm 0.035$ \\ \hline
			Slot F1 score&$0.642 \pm 0.080$ & $0.550 \pm 0.069$ & $0.651 \pm 0.078$ & $0.568 \pm 0.065$ \\ \hline
			Semantic accuracy&$0.179 \pm 0.097$ & $0.065 \pm 0.059$ & $0.191 \pm 0.105$ & $0.089 \pm 0.067$ \\ \hline
			Loss&$2.627 \pm 0.727$ & $3.232 \pm 0.809$ & $2.424 \pm 0.667$ & $2.624 \pm 0.612$ \\ \hline
		\end{tabular}
	}\caption{Таблица сравнения моделей XLM-R после атаки Word level}
\end{table}


In [14]:
print(df_to_latex(output['m-bert'], 'Таблица сравнения моделей M-BERT после атаки Word level'))

\begin{table}[H]
	\resizebox{\textwidth}{!}{
		\begin{tabular}{|>{\bfseries}l|c|c|c|c|}
			\hline
			& m-bert & m-bert en & m-bert adv & m-bert en + adv \\ \hline
			Intent accuracy&$0.866 \pm 0.028$ & $0.771 \pm 0.032$ & $0.863 \pm 0.023$ & $0.781 \pm 0.046$ \\ \hline
			Slot F1 score&$0.556 \pm 0.095$ & $0.444 \pm 0.083$ & $0.585 \pm 0.086$ & $0.489 \pm 0.064$ \\ \hline
			Semantic accuracy&$0.120 \pm 0.079$ & $0.056 \pm 0.053$ & $0.145 \pm 0.088$ & $0.090 \pm 0.065$ \\ \hline
			Loss&$3.137 \pm 0.701$ & $3.335 \pm 0.662$ & $2.878 \pm 0.611$ & $3.019 \pm 0.512$ \\ \hline
		\end{tabular}
	}\caption{Таблица сравнения моделей M-BERT после атаки Word level}
\end{table}


In [15]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА СРЕДНЕМ ПО АТАКЕ ALIGNMENTS

output = {'xlm-r': {}, 'm-bert': {}}

for model_name, model_arg in zip(model_names, model_args):
    df = get_model_attacks(language, model_name.split()[0], *model_arg)
    
    output[model_name.split()[0]][model_name] = {key: [] for key in df.columns}
    
    for idx, row in df.iterrows():
        if 'Align' in idx:
            for key in df.columns:
                output[model_name.split()[0]][model_name][key].append(row[key])
    

output['xlm-r'] = pd.DataFrame.from_dict(output['xlm-r']).rename(index=index_renamer)
output['m-bert'] = pd.DataFrame.from_dict(output['m-bert']).rename(index=index_renamer)

output

{'xlm-r':                                                                xlm-r  \
 Intent accuracy    [0.9483443708609272, 0.943046357615894, 0.9390...   
 Slot F1 score      [0.7985016791526738, 0.834794975112586, 0.7515...   
 Semantic accuracy  [0.5059602649006623, 0.5046357615894039, 0.348...   
 Loss               [1.362755383904043, 1.083700372472307, 1.69466...   
 
                                                             xlm-r en  \
 Intent accuracy    [0.8132450331125828, 0.83841059602649, 0.75894...   
 Slot F1 score      [0.62045281098957, 0.7008774852826836, 0.56533...   
 Semantic accuracy  [0.1642384105960264, 0.2317880794701986, 0.092...   
 Loss               [2.0526277043317496, 1.930259105720018, 2.7359...   
 
                                                            xlm-r adv  \
 Intent accuracy    [0.9536423841059604, 0.9562913907284768, 0.936...   
 Slot F1 score      [0.8142783238489395, 0.8607534573199809, 0.771...   
 Semantic accuracy  [0.541721854304635

In [17]:
print(df_to_latex(output['xlm-r'], 'Таблица сравнения моделей XLM-R после атаки Alignments'))

\begin{table}[H]
	\resizebox{\textwidth}{!}{
		\begin{tabular}{|>{\bfseries}l|c|c|c|c|}
			\hline
			& xlm-r & xlm-r en & xlm-r adv & xlm-r en + adv \\ \hline
			Intent accuracy&$0.947 \pm 0.006$ & $0.728 \pm 0.136$ & $0.954 \pm 0.009$ & $0.864 \pm 0.040$ \\ \hline
			Slot F1 score&$0.708 \pm 0.140$ & $0.581 \pm 0.109$ & $0.721 \pm 0.148$ & $0.641 \pm 0.129$ \\ \hline
			Semantic accuracy&$0.366 \pm 0.156$ & $0.105 \pm 0.074$ & $0.405 \pm 0.164$ & $0.228 \pm 0.138$ \\ \hline
			Loss&$2.026 \pm 1.152$ & $2.860 \pm 0.826$ & $1.992 \pm 1.248$ & $1.943 \pm 0.743$ \\ \hline
		\end{tabular}
	}\caption{Таблица сравнения моделей XLM-R после атаки Alignments}
\end{table}


In [18]:
print(df_to_latex(output['m-bert'], 'Таблица сравнения моделей M-BERT после атаки Alignments'))

\begin{table}[H]
	\resizebox{\textwidth}{!}{
		\begin{tabular}{|>{\bfseries}l|c|c|c|c|}
			\hline
			& m-bert & m-bert en & m-bert adv & m-bert en + adv \\ \hline
			Intent accuracy&$0.942 \pm 0.004$ & $0.828 \pm 0.020$ & $0.950 \pm 0.005$ & $0.818 \pm 0.035$ \\ \hline
			Slot F1 score&$0.700 \pm 0.127$ & $0.536 \pm 0.096$ & $0.728 \pm 0.137$ & $0.577 \pm 0.150$ \\ \hline
			Semantic accuracy&$0.348 \pm 0.127$ & $0.113 \pm 0.055$ & $0.406 \pm 0.158$ & $0.198 \pm 0.113$ \\ \hline
			Loss&$2.118 \pm 1.143$ & $2.474 \pm 0.591$ & $1.935 \pm 1.135$ & $2.252 \pm 0.825$ \\ \hline
		\end{tabular}
	}\caption{Таблица сравнения моделей M-BERT после атаки Alignments}
\end{table}
