In [1]:
import numpy as np
import pandas as pd

from dataset import read_atis
from utils import load_config

import matplotlib.pyplot as plt
import seaborn as sns


sns.set(style='whitegrid', font_scale=1.5)
plt.rc('font', family='Verdana')


config = load_config()

data_path = '/home/lesha/diploma/pieces of paper/reports/main/tables/'
graph_path = '/home/lesha/diploma/pieces of paper/reports/main/images/'

In [2]:
label_num = 0
graph_num = 0


def array_fixer(x):
    if isinstance(x, list) or isinstance(x, np.ndarray):
        return f'${np.mean(x):.3f} \pm {np.std(x):.3f}$'
    else:
        return f'${x:.3f}$'


def df_to_latex(df, caption: str = None):
    result = """\
\\begin{{table}}[H]
\t\\resizebox{{\\textwidth}}{{!}}{{
\t\t\\begin{{tabular}}{{|>{{\\bfseries}}l|{}}}
\t\t\t\\hline
{}
\t\t\\end{{tabular}}
\t}}{}
\\end{{table}}\
"""

    columns = 'c|' * df.shape[1]

    body = ['& ' + ' & '.join(df.columns)]

    body += [
        df.index[i] +
        '&' +
        ' & '.join(map(array_fixer, df.iloc[i].values))
        for i in range(len(df))
    ]

    for i in range(len(body)):
        body[i] = '\t' * 3 + body[i] + ' \\\\ \\hline'

    body = '\n'.join(body).replace('_', '\\_')

    if caption is not None:
        caption = f'\caption{{{caption}}}'
    else:
        caption = ''

    global label_num

    caption += f'\\label{{tab:table{label_num}}}'

    label_num += 1

    result = result.format(columns, body, caption)

    return result


def attack_to_latex(original, adv, caption: str = None):
    result = """\
\\begin{{table}}[H]
\t\\resizebox{{\\textwidth}}{{!}}{{
\t\t\\begin{{tabular}}{{|>{{\\bfseries}}l|{}|}}
\t\t\t\\hline
{}
\t\t\\end{{tabular}}
\t}}{}
\\end{{table}}\
"""

    num_columns = max(len(original.split()), len(adv.split()))

    original = original.split() + [' '] * (num_columns - len(original.split()))
    adv = adv.split() + [' '] * (num_columns - len(adv.split()))

    columns = 'c' * num_columns

    body = ['Utterance en &' + ' & '.join(original)]
    body += ['Utterance adv &' + ' & '.join(adv)]

    for i in range(len(body)):
        body[i] = '\t' * 3 + body[i] + ' \\\\ \\hline'

    body = '\n'.join(body).replace('_', '\\_')

    if caption is not None:
        caption = f'\caption{{{caption}}}'
    else:
        caption = ''

    global label_num

    caption += f'\\label{{tab:table{label_num}}}'

    label_num += 1

    result = result.format(columns, body, caption)

    return result

def plot_df(df, value, caption):
    global graph_num

    result = f'''\
\\begin{{figure}}[h!]
    \\centering
    \\includegraphics[width=\\textwidth]{{images/{graph_num}}}
    \\caption{{{caption}}}\\label{{fig:figure{graph_num}}}
\\end{{figure}}\
'''

    fig, ax = plt.subplots(figsize=(16, 8))

    data = df.reset_index().melt(id_vars='index').rename(
        columns={
            'value': value,
            'index': 'Model name',
            'variable': 'Language'
        }
    )

    sns.barplot(x='Language', y=value, hue='Model name', data=data, ax=ax, alpha=0.75, saturation=0.75)

    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), borderaxespad=0, ncol=4)
    sns.despine(fig)
    
    fig.savefig(graph_path + f'{graph_num}.pdf',  bbox_inches='tight')
    plt.close(fig)

    graph_num += 1

    return result


def plot_dfs(df1, df2, value, caption, first_title, second_title):
    global graph_num

    result = f'''\
\\begin{{figure}}[h!]
    \\centering
    \\includegraphics[width=\\textwidth]{{images/{graph_num}}}
    \\caption{{{caption}}}\\label{{fig:figure{graph_num}}}
\\end{{figure}}\
'''

    fig, ax = plt.subplots(figsize=(16, 8))

    data1 = df1.reset_index().melt(id_vars='index').rename(
        columns={
            'value': value,
            'index': 'Model name',
            'variable': 'Language'
        }
    )
    
    data2 = df2.reset_index().melt(id_vars='index').rename(
        columns={
            'value': value,
            'index': 'Model name',
            'variable': 'Language'
        }
    )
    sns.barplot(x='Language', y=value, hue='Model name', data=data1, ax=ax, alpha=0.75, saturation=0.75)
    leg1 = plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), borderaxespad=0, ncol=4, title=first_title)
    
    sns.barplot(x='Language', y=value, hue='Model name', data=data2, ax=ax, facecolor=(1, 1, 1, 0), linewidth=2.5, edgecolor='.2')
    plt.legend()
    
    handles, labels = ax.get_legend_handles_labels()
    
    ax.legend(handles[4:], labels[4:], loc='upper center', bbox_to_anchor=(0.5, -0.3), borderaxespad=0, ncol=4, title=second_title)

    ax.add_artist(leg1)
    sns.despine(fig)

    fig.savefig(graph_path + f'{graph_num}.pdf',  bbox_inches='tight')
    plt.close(fig)

    graph_num += 1

    return result

In [3]:
model_names = [
    'xlm-r',
    'm-bert',
    'xlm-r en',
    'm-bert en',
    'xlm-r adv',
    'm-bert adv',
    'xlm-r en + adv',
    'm-bert en + adv'
]

model_args = [
    (False, False),
    (False, False),
    (True, False),
    (True, False),
    (False, True),
    (False, True),
    (True, True),
    (True, True),
]

In [4]:
def get_model_attacks(language, model_name, only_english: bool = False, adv_pretrained: bool = False):
    return pd.read_csv(
        f'results/{language}/{model_name}_{int(only_english)}_{int(adv_pretrained)}.csv',
        index_col=0
    )

In [5]:
index_renamer = {
    'intent_acc': 'Intent accuracy',
    'slot_f1': 'Slots F1 score',
    'sementic_frame_acc': 'Semantic accuracy',
}

In [6]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ (БЕЗ ЗАЩИТЫ)

output1 = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('test', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        values = df[key].to_dict()
        values['avg'] = np.mean(list(values.values()))
        output1[index_renamer[key]][model_name] = values

output1 = {key: pd.DataFrame.from_dict(output1[key]).rename(index=index_renamer).transpose() for key in output1.keys()}

In [7]:
with open(data_path + '1.tex', 'w') as f, open(graph_path + '1.tex', 'w') as f2:
    for key in output1.keys():
        print(
            df_to_latex(
                output1[key],
                f'Сравнение моделей между собой \\textbf{{на тестовой выборке}} датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам языки тестовых подвыборок, по рядам тестируемые модели.'
            ),
            file=f,
        )

        print(plot_df(output1[key], key, f'Сравнение моделей между собой \\textbf{{на тестовой выборке}} датасета MultiAtis++ по метрике \\textbf{{{key}}}.'), file=f2)

In [8]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ WORD LEVEL

output2 = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Word' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output2[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output2 = {key: pd.DataFrame.from_dict(output2[key]).rename(index=index_renamer).transpose() for key in output2.keys()}

In [9]:
with open(data_path + '2.tex', 'w') as f, open(graph_path + '2.tex', 'w') as f2:
    for key in output2.keys():
        print(
            df_to_latex(
                output2[key],
                f'Сравнение моделей между собой после \\textbf{{word-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

        print(
            plot_dfs(
                output1[key],
                output2[key],
                key,
                f'Сравнение моделей между собой после \\textbf{{word-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}.',
                'До атаки',
                'После атаки'
            ), 
            file=f2
        )

In [10]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ ALIGNMENTS

output3 = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Align' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output3[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output3 = {key: pd.DataFrame.from_dict(output3[key]).rename(index=index_renamer).transpose() for key in output3.keys()}

In [11]:
with open(data_path + '3.tex', 'w') as f, open(graph_path + '3.tex', 'w') as f2:
    for key in output3.keys():
        print(
            df_to_latex(
                output3[key],
                f'Сравнение моделей между собой после \\textbf{{phrase-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )
        
        print(
            plot_dfs(
                output1[key],
                output3[key],
                key,
                f'Сравнение моделей между собой после \\textbf{{phrase-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}.',
                'До атаки',
                'После атаки'
            ),
            file=f2
        )

In [12]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ (С ЗАЩИТОЙ)

output4 = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('test', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        values = df[key].to_dict()
        values['avg'] = np.mean(list(values.values()))
        output4[index_renamer[key]][model_name] = values

output4 = {key: pd.DataFrame.from_dict(output4[key]).rename(index=index_renamer).transpose() for key in output4.keys()}

In [13]:
with open(data_path + '4.tex', 'w') as f, open(graph_path + '4.tex', 'w') as f2:
    for key in output4.keys():
        print(
            df_to_latex(
                output4[key],
                f'Сравнение моделей \\textbf{{с защитой}} между собой \\textbf{{на тестовой выборке}} датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам языки тестовых подвыборок, по рядам тестируемые модели.'
            ),
            file=f,
        )

        print(
            plot_dfs(
                output4[key],
                output1[key],
                key,
                f'Сравнение моделей \\textbf{{с защитой}} между собой \\textbf{{на тестовой выборке}} датасета MultiAtis++ по метрике \\textbf{{{key}}}.',
                'С защитой',
                'Без защиты'
            ),
            file=f2
        )

In [14]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ WORD LEVEL

output5 = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Word' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output5[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output5 = {key: pd.DataFrame.from_dict(output5[key]).rename(index=index_renamer).transpose() for key in output5.keys()}

In [15]:
with open(data_path + '5.tex', 'w') as f, open(graph_path + '5.tex', 'w') as f2:
    for key in output5.keys():
        print(
            df_to_latex(
                output5[key],
                f'Сравнение моделей \\textbf{{с защитой}} между собой после \\textbf{{word-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )
        
        print(
            plot_dfs(
                output5[key],
                output2[key],
                key,
                f'Сравнение моделей \\textbf{{с защитой}} между собой после \\textbf{{word-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}.',
                'C защитой',
                'Без защиты'
            ),
            file=f2
        )

In [16]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ ALIGNMENTS

output6 = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Align' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output6[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output6 = {key: pd.DataFrame.from_dict(output6[key]).rename(index=index_renamer).transpose() for key in output6.keys()}

In [17]:
with open(data_path + '6.tex', 'w') as f, open(graph_path + '6.tex', 'w') as f2:
    for key in output6.keys():
        print(
            df_to_latex(
                output6[key],
                f'Сравнение моделей \\textbf{{с защитой}} между собой после \\textbf{{phrase-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )
        
        print(
            plot_dfs(
                output6[key],
                output3[key],
                key,
                f'Сравнение моделей \\textbf{{с защитой}} между собой после \\textbf{{phrase-level}} атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}.',
                'С защитой',
                'Без защиты'
            ),
            file=f2
        )

In [18]:
test = read_atis('test', ['en'])
test['len'] = test['utterance'].apply(lambda x: len(x.split()))

from adversarial import AdversarialWordLevel
from adversarial import AdversarialAlignments

In [19]:
from utils import load_config
from utils import save_config

In [20]:
languages = ['de', 'es', 'fr', 'es', 'pt']

In [21]:
with open(data_path + '7.tex', 'w') as f:
    model_name = 'xlm-r'
    model_arg = (False, False)

    config = load_config()

    config['model_name'] = model_name
    config['only_english'] = model_arg[0]
    config['load_adv_pretrained'] = model_arg[1]

    save_config(config)

    f1 = AdversarialWordLevel(base_language='en')
    f1.port_model()

    for i in range(3):
        f1.change_attack_language(np.random.choice(languages))

        random = test[test['len'] == 9].sample(1).iloc[0]

        adv = ' '.join(
            f1.attack(x=[random['utterance']], y_slots=[random['slot_labels']], y_intent=[random['intent']])[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv, caption=f'Пример {i + 1} атаки модели XLM-RoBERTa (xlm-r) word-level атакой.'
        ), file=f)

In [22]:
with open(data_path + '8.tex', 'w') as f:
    model_name = 'xlm-r'
    model_arg = (False, False)

    config = load_config()

    config['model_name'] = model_name
    config['only_english'] = model_arg[0]
    config['load_adv_pretrained'] = model_arg[1]

    save_config(config)

    f1 = AdversarialAlignments(base_language='en')
    f1.port_model()

    for i in range(3):
        f1.change_attack_language(np.random.choice(languages))

        random = test[test['len'] == 9].sample(1).iloc[0]
        alignments = [f1.alignments[j] for j in [random.name]]

        adv = ' '.join(
            f1.attack([random['utterance']], [random['slot_labels']], [random['intent']], alignments)[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv, caption=f'Пример {i + 1} атаки модели XLM-RoBERTa (xlm-r) phrase-level атакой.'
        ), file=f)

In [23]:
mask = (test['len'] > 7) & (test['len'] < 12)

In [24]:
with open(data_path + '9.tex', 'w') as f:
    for model_name, model_arg in zip(model_names, model_args):
        if 'm-bert' not in model_name:
            continue

        config = load_config()

        config['model_name'] = model_name.split()[0]
        config['only_english'] = model_arg[0]
        config['load_adv_pretrained'] = model_arg[1]

        save_config(config)

        f1 = AdversarialWordLevel(base_language='en', attack_language=np.random.choice(languages))
        f1.port_model()

        random = test[mask].sample(1).iloc[0]

        adv = ' '.join(
            f1.attack(x=[random['utterance']], y_slots=[random['slot_labels']], y_intent=[random['intent']])[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv,
            caption=f'Пример атаки модели m-BERT ({model_name}) word-level атакой.'
        ), file=f)

        f1 = AdversarialAlignments(base_language='en', attack_language=np.random.choice(languages))
        f1.port_model()

        random = test[mask].sample(1).iloc[0]
        alignments = [f1.alignments[j] for j in [random.name]]

        adv = ' '.join(
            f1.attack([random['utterance']], [random['slot_labels']], [random['intent']], alignments)[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv,
            caption=f'Пример атаки модели m-BERT ({model_name}) phrase-level атакой.'
        ), file=f)

In [25]:
with open(data_path + '10.tex', 'w') as f:
    for model_name, model_arg in zip(model_names, model_args):
        if 'xlm-r' not in model_name:
            continue

        config = load_config()

        config['model_name'] = model_name.split()[0]
        config['only_english'] = model_arg[0]
        config['load_adv_pretrained'] = model_arg[1]

        save_config(config)

        f1 = AdversarialWordLevel(base_language='en', attack_language=np.random.choice(languages))
        f1.port_model()

        random = test[mask].sample(1).iloc[0]

        adv = ' '.join(
            f1.attack(x=[random['utterance']], y_slots=[random['slot_labels']], y_intent=[random['intent']])[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv,
            caption=f'Пример атаки модели XLM-RoBERTa ({model_name}) word-level атакой.'
        ), file=f)

        f1 = AdversarialAlignments(base_language='en', attack_language=np.random.choice(languages))
        f1.port_model()

        random = test[mask].sample(1).iloc[0]
        alignments = [f1.alignments[j] for j in [random.name]]

        adv = ' '.join(
            f1.attack([random['utterance']], [random['slot_labels']], [random['intent']], alignments)[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv,
            caption=f'Пример атаки модели XLM-RoBERTa ({model_name}) phrase-level атакой.'
        ), file=f)