In [1]:
from typing import Iterator, List, Dict

import torch
import torch.optim as optim
import numpy as np

from allennlp.data import Instance
from allennlp.data.fields import TextField, SequenceLabelField, LabelField
from allennlp.data.dataset_readers import DatasetReader
from allennlp.common.file_utils import cached_path
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
from allennlp.data.tokenizers import Token
from allennlp.data.vocabulary import Vocabulary
from allennlp.models import Model
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.training.metrics import CategoricalAccuracy, Auc
from allennlp.data.iterators import BucketIterator, BasicIterator
from allennlp.training.trainer import Trainer
from allennlp.predictors import TextClassifierPredictor

In [2]:
import sys
sys.path.append('../..')
from adat.models import get_basic_classification_model
from adat.dataset import InsuranceReader, CsvReader

In [4]:
data_path = 'data/'
# reader = InsuranceReader()
reader = CsvReader()

train_dataset = reader.read('../../data/kaggle_transactions_data/train.csv')
# test_dataset = reader.read('../data/kaggle_transactions_data/test.csv')


0it [00:00, ?it/s][A
2496it [00:00, 13319.30it/s][A
5247it [00:00, 15757.21it/s][A
6507it [00:00, 11266.72it/s][A
9474it [00:00, 13842.43it/s][A
11048it [00:00, 11439.18it/s][A
12875it [00:00, 12883.18it/s][A
14647it [00:00, 14031.09it/s][A
16222it [00:01, 10095.51it/s][A
18711it [00:01, 12285.97it/s][A
21288it [00:01, 14572.32it/s][A
23213it [00:01, 9874.73it/s] [A
25302it [00:01, 11730.01it/s][A
26991it [00:01, 12822.21it/s][A
28993it [00:02, 9873.14it/s] [A
31810it [00:02, 12262.36it/s][A
34692it [00:02, 14815.28it/s][A
37400it [00:02, 17143.81it/s][A
39700it [00:02, 11070.45it/s][A
42564it [00:03, 13567.09it/s][A
45591it [00:03, 16258.19it/s][A
48498it [00:03, 18734.72it/s][A
51041it [00:03, 11531.82it/s][A
53628it [00:03, 13824.80it/s][A
56647it [00:03, 16509.24it/s][A
59551it [00:03, 18963.09it/s][A
62109it [00:04, 20511.77it/s][A
64660it [00:04, 21661.99it/s][A
67193it [00:04, 11485.81it/s][A
69975it [00:04, 13941.23it/s][A
72909it [00:04, 16546.49

In [4]:
vocab = Vocabulary.from_instances(train_dataset + test_dataset)

100%|██████████| 306998/306998 [00:04<00:00, 72581.22it/s]


In [5]:
iterator = BasicIterator(batch_size=1024)
iterator.index_with(vocab)

# Model

In [6]:
!nvidia-smi

Sat Jan 18 21:14:06 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.00    Driver Version: 418.87.00    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:02:00.0 Off |                  N/A |
|  0%   28C    P8     9W / 280W |  10277MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:03:00.0 Off |                  N/A |
|  0%   29C    P8     8W / 280W |  10806MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce GTX 108...  Off  | 00000000:83:00.0 Off |                  N/A |
|  0%   

In [7]:
model = get_basic_classification_model(vocab, 2)

In [8]:
cuda_device = 2

In [9]:
model.cuda(cuda_device)

BasicClassifier(
  (_text_field_embedder): BasicTextFieldEmbedder(
    (token_embedder_tokens): Embedding()
  )
  (_seq2seq_encoder): PytorchSeq2SeqWrapper(
    (_module): LSTM(128, 64, batch_first=True, bidirectional=True)
  )
  (_seq2vec_encoder): BoWMaxAndMeanEncoder(
    (maxer): BoWMaxEncoder()
    (meaner): BagOfEmbeddingsEncoder()
  )
  (_classification_layer): Linear(in_features=256, out_features=2, bias=True)
  (_loss): CrossEntropyLoss()
)

In [10]:
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [11]:
num_epochs = 10
patience = 2

trainer = Trainer(
    model=model,
    optimizer=optimizer,
    iterator=iterator,
    train_dataset=train_dataset,
    validation_dataset=test_dataset,
    serialization_dir='experiments/kaggle_exp_4',
    patience=patience,
    num_epochs=num_epochs,
    cuda_device=cuda_device
)

In [12]:
trainer.train()

accuracy: 0.6737, loss: 0.6001 ||: 100%|██████████| 285/285 [01:38<00:00,  2.88it/s]
accuracy: 0.6880, loss: 0.5863 ||: 100%|██████████| 15/15 [00:02<00:00,  5.11it/s]
accuracy: 0.6928, loss: 0.5781 ||: 100%|██████████| 285/285 [01:30<00:00,  3.15it/s]
accuracy: 0.6912, loss: 0.5833 ||: 100%|██████████| 15/15 [00:02<00:00,  5.69it/s]
accuracy: 0.6973, loss: 0.5699 ||: 100%|██████████| 285/285 [01:31<00:00,  3.11it/s]
accuracy: 0.6913, loss: 0.5814 ||: 100%|██████████| 15/15 [00:04<00:00,  3.03it/s]
accuracy: 0.7020, loss: 0.5629 ||: 100%|██████████| 285/285 [01:33<00:00,  3.06it/s]
accuracy: 0.6903, loss: 0.5840 ||: 100%|██████████| 15/15 [00:02<00:00,  5.96it/s]
accuracy: 0.7057, loss: 0.5557 ||: 100%|██████████| 285/285 [01:34<00:00,  3.01it/s]
accuracy: 0.6920, loss: 0.5839 ||: 100%|██████████| 15/15 [00:02<00:00,  5.85it/s]


{'best_epoch': 2,
 'peak_cpu_memory_MB': 4200.432,
 'peak_gpu_0_memory_MB': 10277,
 'peak_gpu_1_memory_MB': 10806,
 'peak_gpu_2_memory_MB': 9547,
 'peak_gpu_3_memory_MB': 10,
 'training_duration': '0:06:28.322201',
 'training_start_epoch': 0,
 'training_epochs': 3,
 'epoch': 3,
 'training_accuracy': 0.7019660686855387,
 'training_loss': 0.5629137595494588,
 'training_cpu_memory_MB': 4200.432,
 'training_gpu_0_memory_MB': 10277,
 'training_gpu_1_memory_MB': 10806,
 'training_gpu_2_memory_MB': 9547,
 'training_gpu_3_memory_MB': 10,
 'validation_accuracy': 0.6902931596091205,
 'validation_loss': 0.5840107679367066,
 'best_validation_accuracy': 0.6913355048859935,
 'best_validation_loss': 0.58143124183019}

In [13]:
trainer.train()

accuracy: 0.6263, loss: 0.6473 ||: 100%|██████████| 72/72 [01:54<00:00,  1.59s/it]
accuracy: 0.6818, loss: 0.6039 ||: 100%|██████████| 4/4 [00:04<00:00,  1.00s/it]
accuracy: 0.6820, loss: 0.5961 ||: 100%|██████████| 72/72 [01:36<00:00,  1.35s/it]
accuracy: 0.6798, loss: 0.5999 ||: 100%|██████████| 4/4 [00:03<00:00,  1.11it/s]
accuracy: 0.6847, loss: 0.5898 ||: 100%|██████████| 72/72 [01:47<00:00,  1.49s/it]
accuracy: 0.6866, loss: 0.5919 ||: 100%|██████████| 4/4 [00:05<00:00,  1.41s/it]
accuracy: 0.6872, loss: 0.5863 ||: 100%|██████████| 72/72 [01:42<00:00,  1.43s/it]
accuracy: 0.6852, loss: 0.5920 ||: 100%|██████████| 4/4 [00:03<00:00,  1.09it/s]
accuracy: 0.6928, loss: 0.5791 ||:  29%|██▉       | 21/72 [00:26<01:12,  1.43s/it]

KeyboardInterrupt: 

In [21]:
# with open("model_classification2.th", 'wb') as f:
#     torch.save(model.state_dict(), f)

# vocab.save_to_files("vocab_classification2")

## Metrics

In [76]:
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, f1_score

In [69]:
dev_probs = []
dev_labels = []

for batch in tqdm(iterator(dev_dataset, num_epochs=1)):
    
    curr_labels = batch['label']
    to_predict = batch['tokens']
    to_predict['tokens'] = to_predict['tokens'].to(cuda_device)
    
    dev_probs.append(model(to_predict)['probs'].detach().cpu().numpy())
    dev_labels.extend(list(curr_labels.cpu().numpy()))


0it [00:00, ?it/s][A
1it [00:00,  4.94it/s][A
6it [00:00,  6.75it/s][A
12it [00:00,  9.13it/s][A
18it [00:00, 12.12it/s][A
24it [00:00, 15.75it/s][A
30it [00:00, 19.88it/s][A
36it [00:00, 24.34it/s][A
42it [00:01, 28.83it/s][A
47it [00:01, 32.93it/s][A
53it [00:01, 36.90it/s][A
59it [00:01, 40.34it/s][A
65it [00:01, 42.90it/s][A
71it [00:01, 45.04it/s][A
77it [00:01, 46.97it/s][A
83it [00:01, 48.49it/s][A
89it [00:01, 49.27it/s][A
95it [00:02, 49.93it/s][A
101it [00:02, 50.33it/s][A
107it [00:02, 50.67it/s][A
113it [00:02, 50.06it/s][A
119it [00:02, 50.71it/s][A
125it [00:02, 51.11it/s][A
131it [00:02, 51.27it/s][A
137it [00:02, 51.47it/s][A
143it [00:02, 51.97it/s][A
149it [00:03, 51.46it/s][A
155it [00:03, 52.07it/s][A
161it [00:03, 52.25it/s][A
167it [00:03, 51.17it/s][A
173it [00:03, 51.91it/s][A
179it [00:03, 52.49it/s][A
185it [00:03, 52.23it/s][A
191it [00:03, 51.89it/s][A
197it [00:04, 51.52it/s][A
203it [00:04, 50.81it/s][A
209it [00:04, 51

In [70]:
dev_probs = np.vstack(dev_probs)
dev_labels = np.array(dev_labels)

In [72]:
dev_probs.shape, dev_labels.shape

((57011, 2), (57011,))

In [78]:
def calculate_metrics(probs, labels):

    metrics = dict()
    metrics['roc_auc'] = roc_auc_score(labels, probs[:, 1])
    metrics['aver_pr'] = average_precision_score(labels, probs[:, 1])
    metrics['f1'] = max(
        [f1_score(y_true=labels, y_pred=(probs[:, 1] > threshold).astype(int))
            for threshold in np.linspace(0.001, 0.99)]
    )

    return metrics

In [79]:
# resulted metrics

calculate_metrics(dev_probs, dev_labels)

  'precision', 'predicted', average, warn_for)


{'roc_auc': 0.8378391125882765,
 'aver_pr': 0.13977888279548203,
 'f1': 0.21110555277638818}

## Adversarial examples

In [22]:
adversarial_examples = []
original_examples = []

with open('/mnt/chatbot_models2/fursov/texar/examples/text_style_transfer/samples_cropped/val.12') as file:
    for i, line in enumerate(file):
        if i % 2 == 0:
            original_examples.append(line.strip())
        else:
            adversarial_examples.append(line.strip())

In [None]:
predictor = TextClassifierPredictor(model=model, dataset_reader=reader)

In [26]:
len(adversarial_examples), len(original_examples)

(1254243, 1254243)

In [28]:
max_examples = 100000

In [29]:
original_probs = []
for example in tqdm(original_examples[:max_examples]):
    original_probs.append(predictor.predict(example)['probs'])

100%|██████████| 100000/100000 [03:25<00:00, 499.16it/s]


In [32]:
adversarial_probs = []
for example in tqdm(adversarial_examples[:max_examples]):
    if example:
        adversarial_probs.append(predictor.predict(example)['probs'])

100%|██████████| 100000/100000 [04:02<00:00, 412.25it/s]


In [33]:
original_probs = np.array(original_probs)
adversarial_probs = np.array(adversarial_probs)

In [34]:
original_probs.shape, adversarial_probs.shape

((100000, 2), (99677, 2))

In [37]:
print(f'Original mean prob = {original_probs[:, 0].mean()}, median = {np.median(original_probs[:, 0])}')
print(f'Original max prob = {original_probs[:, 0].max()}, min = {original_probs[:, 0].min()}')

Original mean prob = 0.9868909656191617, median = 0.9957349002361298
Original max prob = 1.0, min = 0.02725524827837944


In [38]:
print(f'Adversarial mean prob = {adversarial_probs[:, 0].mean()}, median = {np.median(adversarial_probs[:, 0])}')
print(f'Adversarial max prob = {adversarial_probs[:, 0].max()}, min = {adversarial_probs[:, 0].min()}')

Adversarial mean prob = 0.19455415233026777, median = 4.211214036331512e-05
Adversarial max prob = 1.0, min = 1.528894633624645e-23
