### Atis example

In [9]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import pandas as pd
import warnings
import os
import sys

sys.path.append("../")
warnings.filterwarnings("ignore")

Download atis dataset from [here](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data)

### Run NER model

In [10]:
import os


data_path = "/datadrive/JointSLU/data/"
train_path = os.path.join(data_path, "train_filtered.csv")
valid_path = os.path.join(data_path, "valid_filtered.csv")
model_dir = "/datadrive/models/multilingual_L-12_H-768_A-12/"
init_checkpoint_pt = "/datadrive/models/multilingual_L-12_H-768_A-12/pytorch_model.bin"
bert_config_file = os.path.join(model_dir, "bert_config.json")
vocab_file = os.path.join(model_dir, "vocab.txt")

In [11]:
import torch
torch.cuda.set_device(0)
torch.cuda.is_available(), torch.cuda.current_device()

(True, 0)

#### Create data loaders

In [12]:
from modules import BertNerData as NerData

In [13]:
data = NerData.create(train_path, valid_path, vocab_file, data_type="bert_uncased", is_cls=True)

In [14]:
len(data.train_dl.dataset), len(data.valid_dl.dataset)

(9445, 888)

In [15]:
len(data.label2idx), len(data.id2cls)

(144, 17)

In [16]:
sup_labels = list(pd.read_csv("/datadrive/JointSLU/data/slt_flt.csv").slots)
len(sup_labels)

106

#### Create Ner model

Set params of encoder and decoder as proposed [here](https://arxiv.org/pdf/1609.01454.pdf)

In [17]:
from modules.models.bert_models import BertBiLSTMAttnCRFJoint

In [18]:
model = BertBiLSTMAttnCRFJoint.create(len(data.label2idx), len(data.cls2idx),
                                      bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)

In [19]:
model.decoder

AttnCRFJointDecoder(
  (attn): MultiHeadAttention(
    (attention): _MultiHeadAttention(
      (attention): ScaledDotProductAttention(
        (softmax): Softmax()
        (dropout): Dropout(p=0.5)
      )
    )
    (proj): Linear(in_features=192, out_features=256, bias=True)
    (dropout): Dropout(p=0.5)
    (layer_norm): LayerNormalization()
  )
  (linear): Linears(
    (linears): ModuleList(
      (0): Linear(in_features=256, out_features=128, bias=True)
    )
    (output_linear): Linear(in_features=128, out_features=144, bias=True)
  )
  (crf): CRF()
  (intent_out): PoolingLinearClassifier(
    (dropout): Dropout(p=0.5)
    (linear): Linears(
      (linears): ModuleList(
        (0): Linear(in_features=768, out_features=128, bias=True)
      )
      (output_linear): Linear(in_features=128, out_features=17, bias=True)
    )
  )
  (intent_loss): CrossEntropyLoss()
)

#### Create learner

In [24]:
from modules import NerLearner

In [25]:
num_epochs = 100
learner = NerLearner(model, data,
                     best_model_path="/datadrive/models/atis/joint.cpt",
                     lr=0.01, clip=1.0, sup_labels=sup_labels,
                     t_total=num_epochs * len(data.train_dl))

INFO:root:Don't use lr scheduler...


In [65]:
learner.fit(num_epochs, target_metric='prec')

INFO:root:Resuming train... Current epoch 43.


HBox(children=(IntProgress(value=0, max=591), HTML(value='')))

INFO:root:
epoch 44, average train epoch loss=0.095276



HBox(children=(IntProgress(value=0, max=56), HTML(value='')))

INFO:root:on epoch 10 by max_prec: 0.877
INFO:root:on epoch {} classification report:


                              precision    recall  f1-score   support

           B_toloc.city-name      0.971     0.996     0.983       712
                B_round-trip      1.000     0.973     0.986        73
                I_round-trip      1.000     1.000     1.000        71
                         I_O      0.976     0.992     0.984       123
             B_cost-relative      1.000     0.973     0.986        37
               B_fare-amount      1.000     1.000     1.000         2
               I_fare-amount      1.000     1.000     1.000         2
    B_arrive-date.month-name      0.714     0.833     0.769         6
    B_arrive-date.day-number      0.714     0.833     0.769         6
         I_fromloc.city-name      0.967     1.000     0.983       235
         B_stoploc.city-name      1.000     1.000     1.000        20
 B_arrive-time.time-relative      0.967     0.935     0.951        31
          B_arrive-time.time      0.892     0.971     0.930        34
          I_arrive-

### Get best results

In [27]:
learner.load_model()

#### Get span results for valid ds (where train support > 3)

In [28]:
import pandas as pd
sup_slots = list(pd.read_csv("/datadrive/JointSLU/data/sup_slots.csv").sup_slots)

In [44]:
from modules.data.bert_data import get_bert_data_loader_for_predict

In [45]:
dl = get_bert_data_loader_for_predict(data_path + "valid_filtered.csv", learner)

In [55]:
preds, preds_cls = learner.predict(dl)

HBox(children=(IntProgress(value=0, max=56), HTML(value='')))

In [56]:
from modules.train.train import validate_step


clf_report, clf_report_cls = validate_step(
    learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels, learner.data.id2cls)

HBox(children=(IntProgress(value=0, max=56), HTML(value='')))

Mean IOB precision

In [57]:
import numpy as np


np.mean([float(line.split()[1]) for line in clf_report.split("\n")[2:-5] if int(line.split()[-1]) > 0 and line.split()[0] in ss])

0.8768301886792453

Span mean precision

In [61]:
from modules.utils.plot_metrics import get_bert_span_report


clf_report = get_bert_span_report(dl, preds)
np.mean([float(line.split()[1]) for line in clf_report.split("\n")[2:-5] if int(line.split()[-1]) > 0])

0.8235797101449275

Classification mean

In [64]:
np.mean([float(line.split()[1]) for line in clf_report_cls.split("\n")[2:-5] if int(line.split()[-1]) > 0])

0.8943125000000001