In [1]:
from typing import Iterator, List, Dict

import torch
import torch.optim as optim
import numpy as np

from allennlp.data import Instance
from allennlp.data.fields import TextField, SequenceLabelField, LabelField
from allennlp.data.dataset_readers import DatasetReader
from allennlp.common.file_utils import cached_path
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
from allennlp.data.tokenizers import Token
from allennlp.data.vocabulary import Vocabulary
from allennlp.models import Model
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.training.metrics import CategoricalAccuracy, Auc
from allennlp.data.iterators import BucketIterator, BasicIterator
from allennlp.training.trainer import Trainer
from allennlp.predictors import TextClassifierPredictor

In [2]:
class InsReader(DatasetReader):
    def text_to_instance(self, sentence: str, label: int = None) -> Instance:
        if not isinstance(sentence, list):
            sentence = sentence.split()
        
        sentence_field = TextField([Token(word) for word in sentence], {"tokens": SingleIdTokenIndexer()})
        fields = {"tokens": sentence_field}
        
        if label is not None:
            label_field = LabelField(label=label, skip_indexing=True)
            fields["label"] = label_field

        return Instance(fields)

    def _read(self, file_path: str) -> Iterator[Instance]:
        text_path = file_path + '.text'
        labels_path = file_path + '.labels'
        
        with open(text_path) as text_f, open(labels_path) as labels_f:
            for line_t, line_l in zip(text_f, labels_f):
                sentence = line_t.strip()
                label = int(line_l.strip())
                yield self.text_to_instance(sentence, label)

In [3]:
data_path = '/mnt/chatbot_models2/fursov/texar/examples/text_style_transfer/data/insurance_cropped/'
reader = InsReader()

train_dataset = reader.read(data_path + 'insurance.train')
dev_dataset = reader.read(data_path + 'insurance.dev')
test_dataset = reader.read(data_path + 'insurance.test')

266051it [00:17, 15091.94it/s]
57011it [00:02, 26556.73it/s]
57012it [00:03, 16875.45it/s]


In [4]:
vocab = Vocabulary.from_instances(train_dataset + dev_dataset)

100%|██████████| 323062/323062 [00:03<00:00, 96643.55it/s] 


In [5]:
iterator = BasicIterator(batch_size=256)
iterator.index_with(vocab)

# Model

In [6]:
from allennlp.models.basic_classifier import BasicClassifier
from allennlp.modules.seq2vec_encoders import BagOfEmbeddingsEncoder

In [7]:
EMBEDDING_DIM = 32
HIDDEN_DIM = 16

token_embedding = Embedding(
    num_embeddings=vocab.get_vocab_size('tokens'),
    embedding_dim=EMBEDDING_DIM
)

word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
lstm = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
body = BagOfEmbeddingsEncoder(embedding_dim=HIDDEN_DIM)

In [8]:
class MyModel(BasicClassifier):
    def __init__(self, vocabulary, text_field_embedder, seq2seq_encoder, seq2vec_encoder):
        super().__init__(
            vocab=vocabulary, 
            text_field_embedder=text_field_embedder,
            seq2vec_encoder=seq2vec_encoder, 
            seq2seq_encoder=seq2seq_encoder,
            num_labels=2
        )
        self.auc = Auc()

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = super().get_metrics()
        metrics.update({"roc_auc": self.auc.get_metric(reset)})
        return metrics

In [9]:
model = MyModel(
    vocabulary=vocab, 
    text_field_embedder=word_embeddings, 
    seq2seq_encoder=lstm,
    seq2vec_encoder=body,
)

In [10]:
cuda_device = 0
model = model.cuda(cuda_device)

In [11]:
model

MyModel(
  (_text_field_embedder): BasicTextFieldEmbedder(
    (token_embedder_tokens): Embedding()
  )
  (_seq2seq_encoder): PytorchSeq2SeqWrapper(
    (_module): LSTM(32, 16, batch_first=True)
  )
  (_seq2vec_encoder): BagOfEmbeddingsEncoder()
  (_classification_layer): Linear(in_features=16, out_features=2, bias=True)
  (_loss): CrossEntropyLoss()
)

In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
trainer = Trainer(
    model=model,
    optimizer=optimizer,
    iterator=iterator,
    train_dataset=train_dataset,
    validation_dataset=dev_dataset,
    patience=3,
    num_epochs=10,
    cuda_device=cuda_device
)

In [14]:
trainer.train()

accuracy: 0.9836, roc_auc: 0.5000, loss: 0.1131 ||: 100%|██████████| 1040/1040 [00:35<00:00, 29.14it/s]
accuracy: 0.9839, roc_auc: 0.5000, loss: 0.0688 ||: 100%|██████████| 223/223 [00:04<00:00, 49.06it/s]
accuracy: 0.9844, roc_auc: 0.5000, loss: 0.0646 ||: 100%|██████████| 1040/1040 [00:25<00:00, 40.12it/s]
accuracy: 0.9844, roc_auc: 0.5000, loss: 0.0654 ||: 100%|██████████| 223/223 [00:03<00:00, 61.46it/s]
accuracy: 0.9846, roc_auc: 0.5000, loss: 0.0605 ||: 100%|██████████| 1040/1040 [00:26<00:00, 39.05it/s]
accuracy: 0.9846, roc_auc: 0.5000, loss: 0.0632 ||: 100%|██████████| 223/223 [00:04<00:00, 62.37it/s]
accuracy: 0.9848, roc_auc: 0.5000, loss: 0.0588 ||: 100%|██████████| 1040/1040 [00:26<00:00, 38.97it/s]
accuracy: 0.9848, roc_auc: 0.5000, loss: 0.0630 ||: 100%|██████████| 223/223 [00:03<00:00, 61.76it/s]
accuracy: 0.9849, roc_auc: 0.5000, loss: 0.0572 ||: 100%|██████████| 1040/1040 [00:24<00:00, 41.90it/s]
accuracy: 0.9849, roc_auc: 0.5000, loss: 0.0636 ||: 100%|██████████| 223

{'best_epoch': 3,
 'peak_cpu_memory_MB': 3693.532,
 'peak_gpu_0_memory_MB': 583,
 'peak_gpu_1_memory_MB': 145,
 'peak_gpu_2_memory_MB': 10,
 'peak_gpu_3_memory_MB': 10633,
 'training_duration': '0:03:09.386899',
 'training_start_epoch': 0,
 'training_epochs': 5,
 'epoch': 5,
 'training_accuracy': 0.9849996890548917,
 'training_roc_auc': 0.5,
 'training_loss': 0.05633270915758868,
 'training_cpu_memory_MB': 3693.532,
 'training_gpu_0_memory_MB': 583,
 'training_gpu_1_memory_MB': 145,
 'training_gpu_2_memory_MB': 10,
 'training_gpu_3_memory_MB': 10633,
 'validation_accuracy': 0.9849832746242724,
 'validation_roc_auc': 0.5,
 'validation_loss': 0.06385008288305169,
 'best_validation_accuracy': 0.9847838804935276,
 'best_validation_roc_auc': 0.5,
 'best_validation_loss': 0.0629554661450231}

## Metrics

In [76]:
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, f1_score

In [69]:
dev_probs = []
dev_labels = []

for batch in tqdm(iterator(dev_dataset, num_epochs=1)):
    
    curr_labels = batch['label']
    to_predict = batch['tokens']
    to_predict['tokens'] = to_predict['tokens'].to(cuda_device)
    
    dev_probs.append(model(to_predict)['probs'].detach().cpu().numpy())
    dev_labels.extend(list(curr_labels.cpu().numpy()))


0it [00:00, ?it/s][A
1it [00:00,  4.94it/s][A
6it [00:00,  6.75it/s][A
12it [00:00,  9.13it/s][A
18it [00:00, 12.12it/s][A
24it [00:00, 15.75it/s][A
30it [00:00, 19.88it/s][A
36it [00:00, 24.34it/s][A
42it [00:01, 28.83it/s][A
47it [00:01, 32.93it/s][A
53it [00:01, 36.90it/s][A
59it [00:01, 40.34it/s][A
65it [00:01, 42.90it/s][A
71it [00:01, 45.04it/s][A
77it [00:01, 46.97it/s][A
83it [00:01, 48.49it/s][A
89it [00:01, 49.27it/s][A
95it [00:02, 49.93it/s][A
101it [00:02, 50.33it/s][A
107it [00:02, 50.67it/s][A
113it [00:02, 50.06it/s][A
119it [00:02, 50.71it/s][A
125it [00:02, 51.11it/s][A
131it [00:02, 51.27it/s][A
137it [00:02, 51.47it/s][A
143it [00:02, 51.97it/s][A
149it [00:03, 51.46it/s][A
155it [00:03, 52.07it/s][A
161it [00:03, 52.25it/s][A
167it [00:03, 51.17it/s][A
173it [00:03, 51.91it/s][A
179it [00:03, 52.49it/s][A
185it [00:03, 52.23it/s][A
191it [00:03, 51.89it/s][A
197it [00:04, 51.52it/s][A
203it [00:04, 50.81it/s][A
209it [00:04, 51

In [70]:
dev_probs = np.vstack(dev_probs)
dev_labels = np.array(dev_labels)

In [72]:
dev_probs.shape, dev_labels.shape

((57011, 2), (57011,))

In [78]:
def calculate_metrics(probs, labels):

    metrics = dict()
    metrics['roc_auc'] = roc_auc_score(labels, probs[:, 1])
    metrics['aver_pr'] = average_precision_score(labels, probs[:, 1])
    metrics['f1'] = max(
        [f1_score(y_true=labels, y_pred=(probs[:, 1] > threshold).astype(int))
            for threshold in np.linspace(0.001, 0.99)]
    )

    return metrics

In [79]:
# resulted metrics

calculate_metrics(dev_probs, dev_labels)

  'precision', 'predicted', average, warn_for)


{'roc_auc': 0.8378391125882765,
 'aver_pr': 0.13977888279548203,
 'f1': 0.21110555277638818}

## Adversarial examples

In [22]:
adversarial_examples = []
original_examples = []

with open('/mnt/chatbot_models2/fursov/texar/examples/text_style_transfer/samples_cropped/val.12') as file:
    for i, line in enumerate(file):
        if i % 2 == 0:
            original_examples.append(line.strip())
        else:
            adversarial_examples.append(line.strip())

In [None]:
predictor = TextClassifierPredictor(model=model, dataset_reader=reader)

In [26]:
len(adversarial_examples), len(original_examples)

(1254243, 1254243)

In [28]:
max_examples = 100000

In [29]:
original_probs = []
for example in tqdm(original_examples[:max_examples]):
    original_probs.append(predictor.predict(example)['probs'])

100%|██████████| 100000/100000 [03:25<00:00, 499.16it/s]


In [32]:
adversarial_probs = []
for example in tqdm(adversarial_examples[:max_examples]):
    if example:
        adversarial_probs.append(predictor.predict(example)['probs'])

100%|██████████| 100000/100000 [04:02<00:00, 412.25it/s]


In [33]:
original_probs = np.array(original_probs)
adversarial_probs = np.array(adversarial_probs)

In [34]:
original_probs.shape, adversarial_probs.shape

((100000, 2), (99677, 2))

In [37]:
print(f'Original mean prob = {original_probs[:, 0].mean()}, median = {np.median(original_probs[:, 0])}')
print(f'Original max prob = {original_probs[:, 0].max()}, min = {original_probs[:, 0].min()}')

Original mean prob = 0.9868909656191617, median = 0.9957349002361298
Original max prob = 1.0, min = 0.02725524827837944


In [38]:
print(f'Adversarial mean prob = {adversarial_probs[:, 0].mean()}, median = {np.median(adversarial_probs[:, 0])}')
print(f'Adversarial max prob = {adversarial_probs[:, 0].max()}, min = {adversarial_probs[:, 0].min()}')

Adversarial mean prob = 0.19455415233026777, median = 4.211214036331512e-05
Adversarial max prob = 1.0, min = 1.528894633624645e-23
