In [1]:
import numpy as np
import pandas as pd

from dataset import read_atis
from utils import load_config


config = load_config()

data_path = '/home/lesha/diploma/pieces of paper/reports/main/tables/'

In [2]:
label_num = 0


def array_fixer(x):
    if isinstance(x, list) or isinstance(x, np.ndarray):
        return f'${np.mean(x):.3f} \pm {np.std(x):.3f}$'
    else:
        return f'${x:.3f}$'


def df_to_latex(df, caption: str = None):
    result = """\
\\begin{{table}}[H]
\t\\resizebox{{\\textwidth}}{{!}}{{
\t\t\\begin{{tabular}}{{|>{{\\bfseries}}l|{}}}
\t\t\t\\hline
{}
\t\t\\end{{tabular}}
\t}}{}
\\end{{table}}\
"""

    columns = 'c|' * df.shape[1]

    body = ['& ' + ' & '.join(df.columns)]

    body += [
        df.index[i] +
        '&' +
        ' & '.join(map(array_fixer, df.iloc[i].values))
        for i in range(len(df))
    ]

    for i in range(len(body)):
        body[i] = '\t' * 3 + body[i] + ' \\\\ \\hline'

    body = '\n'.join(body).replace('_', '\\_')

    if caption is not None:
        caption = f'\caption{{{caption}}}'
    else:
        caption = ''

    global label_num

    caption += f'\\label{{tab:table{label_num}}}'

    label_num += 1

    result = result.format(columns, body, caption)

    return result


def attack_to_latex(original, adv, caption: str = None):
    result = """\
\\begin{{table}}[H]
\t\\resizebox{{\\textwidth}}{{!}}{{
\t\t\\begin{{tabular}}{{|>{{\\bfseries}}l|{}|}}
\t\t\t\\hline
{}
\t\t\\end{{tabular}}
\t}}{}
\\end{{table}}\
"""

    num_columns = max(len(original.split()), len(adv.split()))

    original = original.split() + [' '] * (num_columns - len(original.split()))
    adv = adv.split() + [' '] * (num_columns - len(adv.split()))

    columns = 'c' * num_columns

    body = ['Utterance en &' + ' & '.join(original)]
    body += ['Utterance adv &' + ' & '.join(adv)]

    for i in range(len(body)):
        body[i] = '\t' * 3 + body[i] + ' \\\\ \\hline'

    body = '\n'.join(body).replace('_', '\\_')

    if caption is not None:
        caption = f'\caption{{{caption}}}'
    else:
        caption = ''

    global label_num

    caption += f'\\label{{tab:table{label_num}}}'

    label_num += 1

    result = result.format(columns, body, caption)

    return result

In [3]:
model_names = [
    'xlm-r',
    'm-bert',
    'xlm-r en',
    'm-bert en',
    'xlm-r adv',
    'm-bert adv',
    'xlm-r en + adv',
    'm-bert en + adv'
]

model_args = [
    (False, False),
    (False, False),
    (True, False),
    (True, False),
    (False, True),
    (False, True),
    (True, True),
    (True, True),
]

In [4]:
def get_model_attacks(language, model_name, only_english: bool = False, adv_pretrained: bool = False):
    return pd.read_csv(
        f'results/{language}/{model_name}_{int(only_english)}_{int(adv_pretrained)}.csv',
        index_col=0
    )

In [5]:
index_renamer = {
    'intent_acc': 'Intent accuracy',
    'slot_f1': 'Slots F1 score',
    'sementic_frame_acc': 'Semantic accuracy',
}

In [6]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ (БЕЗ ЗАЩИТЫ)

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('test', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        values = df[key].to_dict()
        values['avg'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = values

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

In [7]:
with open(data_path + '1.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей между собой на тестовой выборке датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам языки тестовых подвыборок, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [8]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ WORD LEVEL

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Word' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

In [9]:
with open(data_path + '2.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей между собой после word-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [10]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ ALIGNMENTS

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[:4], model_args):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Align' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

In [11]:
with open(data_path + '3.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей между собой после phrase-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [12]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПРОСТО НА ТЕСТОВОЙ ВЫБОРКЕ (С ЗАЩИТОЙ)

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('test', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        values = df[key].to_dict()
        values['avg'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = values

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

In [13]:
with open(data_path + '4.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей с защитой между собой на тестовой выборке датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам языки тестовых подвыборок, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [14]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ WORD LEVEL

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Word' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

In [15]:
with open(data_path + '5.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей с защитой между собой после word-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [16]:
# СРАВНЕНИЕ МОДЕЛЕЙ МЕЖДУ СОБОЙ ПО АТАКЕ ALIGNMENTS

output = {index_renamer[key]: {} for key in index_renamer}

for model_name, model_arg in zip(model_names[4:], model_args[4:]):
    df = get_model_attacks('en', model_name.split()[0], *model_arg)

    for key in index_renamer.keys():
        mask = df.index.map(lambda x: 'Align' in x)
        values = df[mask][key].to_dict()
        values['[avg]'] = np.mean(list(values.values()))
        output[index_renamer[key]][model_name] = {key_[key_.find('[') + 1:key_.find(']')]: values[key_] for key_ in
                                                  values.keys()}

output = {key: pd.DataFrame.from_dict(output[key]).rename(index=index_renamer).transpose() for key in output.keys()}

In [17]:
with open(data_path + '6.tex', 'w') as f:
    for key in output.keys():
        print(
            df_to_latex(
                output[key],
                f'Сравнение моделей с защитой между собой после phrase-level атаки на тестовую выборку датасета MultiAtis++ по метрике \\textbf{{{key}}}. По колонкам встраиваемые языки, по рядам тестируемые модели.'
            ),
            file=f,
        )

In [18]:
test = read_atis('test', ['en'])
test['len'] = test['utterance'].apply(lambda x: len(x.split()))

from adversarial import AdversarialWordLevel
from adversarial import AdversarialAlignments

In [19]:
from utils import load_config
from utils import save_config

In [20]:
languages = ['de', 'es', 'fr']

In [21]:
with open(data_path + '7.tex', 'w') as f:
    model_name = 'xlm-r'
    model_arg = (False, False)

    config = load_config()

    config['model_name'] = model_name
    config['only_english'] = model_arg[0]
    config['load_adv_pretrained'] = model_arg[1]

    save_config(config)

    f1 = AdversarialWordLevel(base_language='en')
    f1.port_model()

    for i in range(3):
        f1.change_attack_language(languages[i])

        random = test[test['len'] == 9].sample(1).iloc[0]

        adv = ' '.join(
            f1.attack(x=[random['utterance']], y_slots=[random['slot_labels']], y_intent=[random['intent']])[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv, caption=f'Пример {i + 1} атаки модели XLM-RoBERTa (xlm-r) word-level атакой.'
        ), file=f)

In [22]:
with open(data_path + '8.tex', 'w') as f:
    model_name = 'xlm-r'
    model_arg = (False, False)

    config = load_config()

    config['model_name'] = model_name
    config['only_english'] = model_arg[0]
    config['load_adv_pretrained'] = model_arg[1]

    save_config(config)

    f1 = AdversarialAlignments(base_language='en')
    f1.port_model()

    for i in range(3):
        f1.change_attack_language(languages[i])

        random = test[test['len'] == 9].sample(1).iloc[0]
        alignments = [f1.alignments[j] for j in [random.name]]

        adv = ' '.join(
            f1.attack([random['utterance']], [random['slot_labels']], [random['intent']], alignments)[0][0]
        )

        print(attack_to_latex(
            random['utterance'], adv, caption=f'Пример {i + 1} атаки модели XLM-RoBERTa (xlm-r) phrase-level атакой.'
        ), file=f)

In [23]:
mask = (test['len'] > 7) & (test['len'] < 12)

In [24]:
with open(data_path + '9.tex', 'w') as f:
    for model_name, model_arg in zip(model_names, model_args):
        if 'm-bert' not in model_name:
            continue

        config = load_config()

        config['model_name'] = model_name.split()[0]
        config['only_english'] = model_arg[0]
        config['load_adv_pretrained'] = model_arg[1]

        save_config(config)

        f1 = AdversarialWordLevel(base_language='en')
        f1.port_model()

        for i in range(3):
            f1.change_attack_language(languages[i])

            random = test[mask].sample(1).iloc[0]

            adv = ' '.join(
                f1.attack(x=[random['utterance']], y_slots=[random['slot_labels']], y_intent=[random['intent']])[0][0]
            )

            print(attack_to_latex(
                random['utterance'], adv,
                caption=f'Пример {i + 1} атаки модели m-BERT ({model_name}) word-level атакой.'
            ), file=f)

        f1 = AdversarialAlignments(base_language='en')
        f1.port_model()

        for i in range(3):
            f1.change_attack_language(languages[i])

            random = test[mask].sample(1).iloc[0]
            alignments = [f1.alignments[j] for j in [random.name]]

            adv = ' '.join(
                f1.attack([random['utterance']], [random['slot_labels']], [random['intent']], alignments)[0][0]
            )

            print(attack_to_latex(
                random['utterance'], adv,
                caption=f'Пример {i + 1} атаки модели m-BERT ({model_name}) phrase-level атакой.'
            ), file=f)

In [25]:
with open(data_path + '10.tex', 'w') as f:
    for model_name, model_arg in zip(model_names, model_args):
        if 'xlm-r' not in model_name:
            continue

        config = load_config()

        config['model_name'] = model_name.split()[0]
        config['only_english'] = model_arg[0]
        config['load_adv_pretrained'] = model_arg[1]

        save_config(config)

        f1 = AdversarialWordLevel(base_language='en')
        f1.port_model()

        for i in range(3):
            f1.change_attack_language(languages[i])

            random = test[mask].sample(1).iloc[0]

            adv = ' '.join(
                f1.attack(x=[random['utterance']], y_slots=[random['slot_labels']], y_intent=[random['intent']])[0][0]
            )

            print(attack_to_latex(
                random['utterance'], adv,
                caption=f'Пример {i + 1} атаки модели XLM-RoBERTa ({model_name}) word-level атакой.'
            ), file=f)

        f1 = AdversarialAlignments(base_language='en')
        f1.port_model()

        for i in range(3):
            f1.change_attack_language(languages[i])

            random = test[mask].sample(1).iloc[0]
            alignments = [f1.alignments[j] for j in [random.name]]

            adv = ' '.join(
                f1.attack([random['utterance']], [random['slot_labels']], [random['intent']], alignments)[0][0]
            )

            print(attack_to_latex(
                random['utterance'], adv,
                caption=f'Пример {i + 1} атаки модели XLM-RoBERTa ({model_name}) phrase-level атакой.'
            ), file=f)