In [1]:
from typing import Iterator, List, Dict

import torch
import torch.optim as optim
import numpy as np

from allennlp.data import Instance
from allennlp.data.fields import TextField, SequenceLabelField, LabelField
from allennlp.data.dataset_readers import DatasetReader
from allennlp.common.file_utils import cached_path
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
from allennlp.data.tokenizers import Token
from allennlp.data.vocabulary import Vocabulary
from allennlp.models import Model
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.training.metrics import CategoricalAccuracy, Auc
from allennlp.data.iterators import BucketIterator, BasicIterator
from allennlp.training.trainer import Trainer
from allennlp.predictors import TextClassifierPredictor

In [2]:
import sys
sys.path.append('..')
from adat.models import get_basic_classification_model
from adat.dataset import InsuranceReader

In [3]:
!ls data

dataset.zip		     test.text	       transactions_test.csv
small_group_description.csv  train.labels      transactions_train.csv
test.csv		     train_target.csv
test.labels		     train.text


In [4]:
data_path = 'data/'
reader = InsuranceReader()

train_dataset = reader.read(data_path + 'train')
test_dataset = reader.read(data_path + 'test')

220739it [00:09, 24075.95it/s]
47224it [00:01, 24121.97it/s]


In [6]:
vocab = Vocabulary.from_instances(train_dataset + test_dataset)

100%|██████████| 267963/267963 [00:01<00:00, 137751.69it/s]


In [14]:
iterator = BasicIterator(batch_size=4096)
iterator.index_with(vocab)

# Model

In [15]:
model = get_basic_classification_model(vocab)

In [16]:
model.cuda(0)

BasicClassifier(
  (_text_field_embedder): BasicTextFieldEmbedder(
    (token_embedder_tokens): Embedding()
  )
  (_seq2seq_encoder): PytorchSeq2SeqWrapper(
    (_module): LSTM(32, 16, batch_first=True)
  )
  (_seq2vec_encoder): BagOfEmbeddingsEncoder()
  (_classification_layer): Linear(in_features=16, out_features=2, bias=True)
  (_loss): CrossEntropyLoss()
)

In [17]:
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [18]:
trainer = Trainer(
    model=model,
    optimizer=optimizer,
    iterator=iterator,
    train_dataset=train_dataset,
    validation_dataset=test_dataset,
    patience=2,
    num_epochs=30,
    cuda_device=0
)

In [19]:
trainer.train()

accuracy: 0.9772, loss: 0.2432 ||: 100%|██████████| 54/54 [00:12<00:00,  5.17it/s]
accuracy: 0.9893, loss: 0.1234 ||: 100%|██████████| 12/12 [00:02<00:00,  5.87it/s]
accuracy: 0.9891, loss: 0.0855 ||: 100%|██████████| 54/54 [00:11<00:00,  5.23it/s]
accuracy: 0.9888, loss: 0.0688 ||: 100%|██████████| 12/12 [00:03<00:00,  2.51it/s]
accuracy: 0.9890, loss: 0.0592 ||: 100%|██████████| 54/54 [00:10<00:00,  5.27it/s]
accuracy: 0.9891, loss: 0.0575 ||: 100%|██████████| 12/12 [00:02<00:00,  5.63it/s]
accuracy: 0.9892, loss: 0.0522 ||: 100%|██████████| 54/54 [00:12<00:00,  5.39it/s]
accuracy: 0.9891, loss: 0.0539 ||: 100%|██████████| 12/12 [00:02<00:00,  6.31it/s]
accuracy: 0.9893, loss: 0.0488 ||: 100%|██████████| 54/54 [00:10<00:00,  4.97it/s]
accuracy: 0.9891, loss: 0.0527 ||: 100%|██████████| 12/12 [00:02<00:00,  5.62it/s]
accuracy: 0.9894, loss: 0.0469 ||: 100%|██████████| 54/54 [00:11<00:00,  5.41it/s]
accuracy: 0.9891, loss: 0.0520 ||: 100%|██████████| 12/12 [00:02<00:00,  5.67it/s]
accu

{'best_epoch': 7,
 'peak_cpu_memory_MB': 3255.652,
 'peak_gpu_0_memory_MB': 2363,
 'peak_gpu_1_memory_MB': 573,
 'peak_gpu_2_memory_MB': 10,
 'peak_gpu_3_memory_MB': 577,
 'training_duration': '0:02:06.671360',
 'training_start_epoch': 0,
 'training_epochs': 8,
 'epoch': 8,
 'training_accuracy': 0.9895623337969276,
 'training_loss': 0.043179076406414864,
 'training_cpu_memory_MB': 3255.652,
 'training_gpu_0_memory_MB': 2363,
 'training_gpu_1_memory_MB': 573,
 'training_gpu_2_memory_MB': 10,
 'training_gpu_3_memory_MB': 577,
 'validation_accuracy': 0.9888827714721328,
 'validation_loss': 0.05132229377826055,
 'best_validation_accuracy': 0.9883957309842453,
 'best_validation_loss': 0.05109726885954539}

In [21]:
# with open("model_classification2.th", 'wb') as f:
#     torch.save(model.state_dict(), f)

# vocab.save_to_files("vocab_classification2")

## Metrics

In [76]:
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, f1_score

In [69]:
dev_probs = []
dev_labels = []

for batch in tqdm(iterator(dev_dataset, num_epochs=1)):
    
    curr_labels = batch['label']
    to_predict = batch['tokens']
    to_predict['tokens'] = to_predict['tokens'].to(cuda_device)
    
    dev_probs.append(model(to_predict)['probs'].detach().cpu().numpy())
    dev_labels.extend(list(curr_labels.cpu().numpy()))


0it [00:00, ?it/s][A
1it [00:00,  4.94it/s][A
6it [00:00,  6.75it/s][A
12it [00:00,  9.13it/s][A
18it [00:00, 12.12it/s][A
24it [00:00, 15.75it/s][A
30it [00:00, 19.88it/s][A
36it [00:00, 24.34it/s][A
42it [00:01, 28.83it/s][A
47it [00:01, 32.93it/s][A
53it [00:01, 36.90it/s][A
59it [00:01, 40.34it/s][A
65it [00:01, 42.90it/s][A
71it [00:01, 45.04it/s][A
77it [00:01, 46.97it/s][A
83it [00:01, 48.49it/s][A
89it [00:01, 49.27it/s][A
95it [00:02, 49.93it/s][A
101it [00:02, 50.33it/s][A
107it [00:02, 50.67it/s][A
113it [00:02, 50.06it/s][A
119it [00:02, 50.71it/s][A
125it [00:02, 51.11it/s][A
131it [00:02, 51.27it/s][A
137it [00:02, 51.47it/s][A
143it [00:02, 51.97it/s][A
149it [00:03, 51.46it/s][A
155it [00:03, 52.07it/s][A
161it [00:03, 52.25it/s][A
167it [00:03, 51.17it/s][A
173it [00:03, 51.91it/s][A
179it [00:03, 52.49it/s][A
185it [00:03, 52.23it/s][A
191it [00:03, 51.89it/s][A
197it [00:04, 51.52it/s][A
203it [00:04, 50.81it/s][A
209it [00:04, 51

In [70]:
dev_probs = np.vstack(dev_probs)
dev_labels = np.array(dev_labels)

In [72]:
dev_probs.shape, dev_labels.shape

((57011, 2), (57011,))

In [78]:
def calculate_metrics(probs, labels):

    metrics = dict()
    metrics['roc_auc'] = roc_auc_score(labels, probs[:, 1])
    metrics['aver_pr'] = average_precision_score(labels, probs[:, 1])
    metrics['f1'] = max(
        [f1_score(y_true=labels, y_pred=(probs[:, 1] > threshold).astype(int))
            for threshold in np.linspace(0.001, 0.99)]
    )

    return metrics

In [79]:
# resulted metrics

calculate_metrics(dev_probs, dev_labels)

  'precision', 'predicted', average, warn_for)


{'roc_auc': 0.8378391125882765,
 'aver_pr': 0.13977888279548203,
 'f1': 0.21110555277638818}

## Adversarial examples

In [22]:
adversarial_examples = []
original_examples = []

with open('/mnt/chatbot_models2/fursov/texar/examples/text_style_transfer/samples_cropped/val.12') as file:
    for i, line in enumerate(file):
        if i % 2 == 0:
            original_examples.append(line.strip())
        else:
            adversarial_examples.append(line.strip())

In [None]:
predictor = TextClassifierPredictor(model=model, dataset_reader=reader)

In [26]:
len(adversarial_examples), len(original_examples)

(1254243, 1254243)

In [28]:
max_examples = 100000

In [29]:
original_probs = []
for example in tqdm(original_examples[:max_examples]):
    original_probs.append(predictor.predict(example)['probs'])

100%|██████████| 100000/100000 [03:25<00:00, 499.16it/s]


In [32]:
adversarial_probs = []
for example in tqdm(adversarial_examples[:max_examples]):
    if example:
        adversarial_probs.append(predictor.predict(example)['probs'])

100%|██████████| 100000/100000 [04:02<00:00, 412.25it/s]


In [33]:
original_probs = np.array(original_probs)
adversarial_probs = np.array(adversarial_probs)

In [34]:
original_probs.shape, adversarial_probs.shape

((100000, 2), (99677, 2))

In [37]:
print(f'Original mean prob = {original_probs[:, 0].mean()}, median = {np.median(original_probs[:, 0])}')
print(f'Original max prob = {original_probs[:, 0].max()}, min = {original_probs[:, 0].min()}')

Original mean prob = 0.9868909656191617, median = 0.9957349002361298
Original max prob = 1.0, min = 0.02725524827837944


In [38]:
print(f'Adversarial mean prob = {adversarial_probs[:, 0].mean()}, median = {np.median(adversarial_probs[:, 0])}')
print(f'Adversarial max prob = {adversarial_probs[:, 0].max()}, min = {adversarial_probs[:, 0].min()}')

Adversarial mean prob = 0.19455415233026777, median = 4.211214036331512e-05
Adversarial max prob = 1.0, min = 1.528894633624645e-23
