# Examples

In this notebook, sentence augmentations examples for both aggregation methods are shown. 

In [1]:
# Make imports from parent directory possible
import sys
sys.path.insert(0, "../")
# Disable verbose warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import logging

# Import libraries
import torch

from transformers.models.bert.tokenization_bert import BertTokenizer
from transformers.models.bert.modeling_bert import BertModel

from transformers.models.roberta.tokenization_roberta import RobertaTokenizer
from transformers.models.roberta.modeling_roberta import RobertaModel
logging.getLogger("transformers").setLevel(logging.ERROR)

from attention_driven_dropout import AttentionDrivenDropout

## Instantiate Tokenizer and Model
You can instantiate either the BERT or RoBERTA tokenizer and model. We show the output for both base models here.

In [2]:
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")

roberta_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
roberta_model = RobertaModel.from_pretrained("roberta-base")

In [3]:
example_sentences = [
    "We should go to the small italian restaurant again!",
    "Two big dogs are running fast in the park.",
    "Mary helped John to style his new apartment.",
    "A brown bear is eating a small fish.",
]

input_ids_bert = torch.tensor(bert_tokenizer(example_sentences, padding=True).input_ids)
input_ids_roberta = torch.tensor(roberta_tokenizer(example_sentences, padding=True).input_ids)

Helper functions for printing the output of the augmentations.

In [4]:
def print_bert(input_ids, altered_input_ids, scores):
    for i in range(len(input_ids)):
        print("Original sentence:", bert_tokenizer.decode(input_ids[i]))
        print("Altered sentence:", bert_tokenizer.decode(altered_input_ids[i]))
        min_index = scores[i][scores[i] > 0].min(0).indices
        print(f"Removed: {bert_tokenizer.decode(input_ids[i][min_index])} ({scores[i][min_index]})")
        print()
    
def print_roberta(input_ids, altered_input_ids, scores):
    for i in range(len(input_ids)):
        print("Original sentence:", roberta_tokenizer.decode(input_ids[i]))
        print("Altered sentence:", roberta_tokenizer.decode(altered_input_ids[i]))
        min_index = scores[i][scores[i] > 0].min(0).indices
        print(f"Removed: {roberta_tokenizer.decode(input_ids[i][min_index])} ({scores[i][min_index]})")
        print()

## Example 1: Naive Aggregation

### BERT

In [7]:
naive_add = AttentionDrivenDropout(bert_model, n_dropout=1, min_tokens=1)
naive_output_ids, scores = naive_add(input_ids_bert, return_scores=True, num_sent=1)

print_bert(input_ids_bert, naive_output_ids, scores)

Original sentence: [CLS] we should go to the small italian restaurant again! [SEP]
Altered sentence: [CLS] we should go to the small restaurant again! [SEP] [PAD]
Removed: i t a l i a n (57.876129150390625)

Original sentence: [CLS] two big dogs are running fast in the park. [SEP]
Altered sentence: [CLS] two dogs are running fast in the park. [SEP] [PAD]
Removed: b i g (54.35097885131836)

Original sentence: [CLS] mary helped john to style his new apartment. [SEP] [PAD]
Altered sentence: [CLS] mary helped john to style his apartment. [SEP] [PAD] [PAD]
Removed: n e w (55.61100769042969)

Original sentence: [CLS] a brown bear is eating a small fish. [SEP] [PAD]
Altered sentence: [CLS] a brown bear is eating a fish. [SEP] [PAD] [PAD]
Removed: s m a l l (51.31556701660156)



### RoBERTa

In [5]:
naive_add = AttentionDrivenDropout(roberta_model, n_dropout=1, min_tokens=1)
naive_output_ids, scores = naive_add(input_ids_roberta[1:], return_scores=True, num_sent=1)

print_roberta(input_ids_roberta[1:], naive_output_ids, scores)

Original sentence: <s>Two big dogs are running fast in the park.</s><pad>
Altered sentence: <s>Two dogs are running fast in the park.</s><pad><pad>
Removed:  big (49.18559265136719)

Original sentence: <s>Mary helped John to style his new apartment.</s><pad><pad>
Altered sentence: <s>Mary helped John to style new apartment.</s><pad><pad><pad>
Removed:  his (54.62554931640625)

Original sentence: <s>A brown bear is eating a small fish.</s><pad><pad>
Altered sentence: <s>A bear is eating a small fish.</s><pad><pad><pad>
Removed:  brown (48.095272064208984)



## Example 2: Attention Rollout Aggregation

### BERT

In [9]:
add_rollout = AttentionDrivenDropout(bert_model, n_dropout=1, min_tokens=1, summation_method="rollout")
rollout_output_ids, scores = add_rollout(input_ids_bert, return_scores=True, num_sent=1)

print_bert(input_ids_bert, rollout_output_ids, scores)

Original sentence: [CLS] we should go to the small italian restaurant again! [SEP]
Altered sentence: [CLS] we should go to the italian restaurant again! [SEP] [PAD]
Removed: s m a l l (5.803347110748291)

Original sentence: [CLS] two big dogs are running fast in the park. [SEP]
Altered sentence: [CLS] two dogs are running fast in the park. [SEP] [PAD]
Removed: b i g (5.9078240394592285)

Original sentence: [CLS] mary helped john to style his new apartment. [SEP] [PAD]
Altered sentence: [CLS] mary helped to style his new apartment. [SEP] [PAD] [PAD]
Removed: j o h n (5.947506904602051)

Original sentence: [CLS] a brown bear is eating a small fish. [SEP] [PAD]
Altered sentence: [CLS] a bear is eating a small fish. [SEP] [PAD] [PAD]
Removed: b r o w n (5.942243576049805)



### RoBERTa

In [10]:
add_rollout = AttentionDrivenDropout(roberta_model, n_dropout=1, min_tokens=1, summation_method="rollout")
rollout_output_ids, scores = add_rollout(input_ids_roberta, return_scores=True, num_sent=1)

print_roberta(input_ids_roberta, rollout_output_ids, scores)

Original sentence: <s>We should go to the small italian restaurant again!</s>
Altered sentence: <s>We should go to the italian restaurant again!</s><pad>
Removed:  small (6.072007179260254)

Original sentence: <s>Two big dogs are running fast in the park.</s><pad>
Altered sentence: <s>Two big dogs are fast in the park.</s><pad><pad>
Removed:  running (5.800328731536865)

Original sentence: <s>Mary helped John to style his new apartment.</s><pad><pad>
Altered sentence: <s>Mary helped to style his new apartment.</s><pad><pad><pad>
Removed:  John (5.892853260040283)

Original sentence: <s>A brown bear is eating a small fish.</s><pad><pad>
Altered sentence: <s>A brown bear is eating a fish.</s><pad><pad><pad>
Removed:  small (6.218420028686523)

