In [1]:
# download SNIPS and ATIS datesets for intent recognition

import requests
import pandas as pd


data_url = 'https://raw.githubusercontent.com/ZephyrChenzf/SF-ID-Network-For-NLU/master/data'
dfs = []
for corpus in ['atis', 'snips']:
    for part in ['train', 'valid', 'test']:
        df = {}
        for k, n in [('text', 'seq.in'), ('intent', 'label')]:
            df[k] = requests.get('/'.join([data_url, corpus, part, n])).text.split('\n')[:-1]
        df = pd.DataFrame(df)
        df['part'] = part
        df['corpus'] = corpus
        dfs.append(df)
df = pd.concat(dfs).reset_index(drop=True)
df

Unnamed: 0,text,intent,part,corpus
0,i want to fly from baltimore to dallas round trip,atis_flight,train,atis
1,round trip fares from baltimore to philadelphi...,atis_airfare,train,atis
2,show me the flights arriving on baltimore on j...,atis_flight,train,atis
3,what are the flights which depart from san fra...,atis_flight,train,atis
4,which airlines fly from boston to washington d...,atis_airline,train,atis
...,...,...,...,...
20350,find a tv series called armageddon summer,SearchCreativeWork,test,snips
20351,find politicsnation with al sharpton,SearchCreativeWork,test,snips
20352,rate this album 0 points out of 6,RateBook,test,snips
20353,add leah kauffman to my uncharted 4 nathan dra...,AddToPlaylist,test,snips


In [2]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import tensorflow as tf
import tensorflow.keras.backend as K

np.set_printoptions(suppress=True)
print(tf.__version__)

2.1.0


In [3]:
# print maximum number of words

df.text.str.split(' ').str.len().max()

46

In [0]:
def return_id(tokenizer, max_length, str1, str2=None, truncation_strategy='longest_first'):
    """Tokenize and create inputs for transformers like BERT, RoBERTa and ALBERT"""

    inputs = tokenizer.encode_plus(str1, str2,
        add_special_tokens=True,
        pad_to_max_length=True,
        max_length=max_length,
        truncation_strategy=truncation_strategy)

    return [inputs[k] for k in ["input_ids", "attention_mask", "token_type_ids"] if k in inputs]

def compute_input_arrays(df, tokenizer, max_length):
    """Transform dataset to transformer input"""
    return [np.asarray(x, dtype=np.int32) for x in
            zip(*(return_id(tokenizer, max_length, t) for t in tqdm(df.text)))]

def compute_output_arrays(df, intents_dict):
    """Transform dataset to target variable"""
    return np.asarray(df.intent.apply(lambda i: intents_dict.get(i, 0)))

In [0]:
from sklearn.metrics import accuracy_score
from transformers import AutoConfig, TFAutoModel, AutoTokenizer


class PositiveDense(tf.keras.layers.Layer):
    """Keras layer for linear combination of multiple layer outputs (last dimension)
    with positive multipliers"""
    def __init__(self):
        super(PositiveDense, self).__init__()

    def my_init(self, shape, dtype=None):
        x = np.zeros(shape)
        x[-1] = -3
        return K.variable(value=x, dtype=dtype)
        
    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.kernel = self.add_weight(shape=(input_dim),
                                      initializer=self.my_init,
                                      name='kernel')
        self.built = True

    def call(self, inputs):
        return K.sum(inputs * K.softmax(self.kernel), axis=-1)


class Print(tf.keras.layers.Layer):
    """Keras layer for debug printing of any variable"""
    def __init__(self):
        super(Print, self).__init__()
        
    def call(self, inputs, *args, **kwargs):
        tf.print("var: ", inputs)
        return inputs


def create_model(n_inputs, n_outputs, max_length):
    """BERT (or other transformer) model with:
    - positive linear combination of all layer outputs
    - dense layer on top
    - multisample dropout https://arxiv.org/abs/1905.09788"""
    x_in = [tf.keras.layers.Input((max_length,), dtype=tf.int32) for _ in range(n_inputs)]
    
    config = AutoConfig.from_pretrained(model_name, output_attention=False, output_hidden_states=True)
    
    bert_model = TFAutoModel.from_pretrained(
        model_name, config=config)
    
    # (layers + 1) x batch_size x sequence x hidden_state
    hidden_layers = bert_model(x_in)[2]

    dropout = tf.keras.layers.Dropout(0.2)
    
    # batch_size x hidden_state x (layers + 1)
    cls_outputs = K.stack(
        [dropout(layer[:, 0, :]) for layer in hidden_layers],
        axis=2)

    # batch_size x hidden_state
    cls_output = PositiveDense()(cls_outputs)

    high_dropout = tf.keras.layers.Dropout(0.5)

    top_classifier = tf.keras.layers.Dense(n_outputs, activation='sigmoid')
    
    # multisample dropout https://arxiv.org/abs/1905.09788
    # batch_size x n_outputs
    logits = K.mean(K.stack([
        top_classifier(high_dropout(cls_output))
        for _ in range(5)
    ]), axis=0)
    
    model = tf.keras.models.Model(inputs=x_in, outputs=logits)
    
    return model


def train_test(model_name, batch_size, max_length, learning_rate, epochs, patience, data):
    """training and testing model on dataset, using early stopping with best model by validation accuracy"""
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    intents = np.concatenate([['_unk'], np.sort(data.intent[data.part == 'train'].unique())])
    intents_dict = {intent: ind for ind, intent in enumerate(intents)}

    outputs = compute_output_arrays(data, intents_dict)
    inputs = compute_input_arrays(data, tokenizer, max_length)

    train_inputs = [inp[data.part == 'train'] for inp in inputs]
    train_outputs = outputs[data.part == 'train']

    valid_inputs = [inp[data.part == 'valid'] for inp in inputs]
    valid_outputs = outputs[data.part == 'valid']

    K.clear_session()
    model = create_model(len(train_inputs), len(intents_dict), max_length)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    model.fit(train_inputs, train_outputs, epochs=epochs, batch_size=batch_size, callbacks=[
        tf.keras.callbacks.ModelCheckpoint(
            'bert', monitor='val_accuracy', save_best_only=True, save_weights_only=True, verbose=1),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy', patience=patience, restore_best_weights=True, verbose=1)
    ], validation_data=(valid_inputs, valid_outputs))
    preds = model.predict(inputs)

    for part in ['train', 'valid', 'test']:
        acc = accuracy_score(y_true=outputs[data.part == part],
                             y_pred=np.argmax(preds[data.part == part], axis=-1))
        print(f'{part:7} acc = {acc:.2%}')

In [7]:
# run model on ATIS and SNIPS datasets

model_name = 'bert-base-uncased'
batch_size = 8
max_length = 64
learning_rate = 1e-5
epochs = 20
patience = 3

for dataset in ['atis', 'snips']:
    print(dataset)
    train_test(model_name=model_name,
               batch_size=batch_size,
               max_length=max_length,
               learning_rate=learning_rate,
               epochs=epochs,
               patience=patience,
               data=df[df.corpus == dataset])

atis


HBox(children=(FloatProgress(value=0.0, max=5871.0), HTML(value='')))


Train on 4478 samples, validate on 500 samples
Epoch 1/20
Epoch 00001: val_accuracy improved from -inf to 0.93800, saving model to bert
Epoch 2/20
Epoch 00002: val_accuracy improved from 0.93800 to 0.97400, saving model to bert
Epoch 3/20
Epoch 00003: val_accuracy did not improve from 0.97400
Epoch 4/20
Epoch 00004: val_accuracy improved from 0.97400 to 0.97600, saving model to bert
Epoch 5/20
Epoch 00005: val_accuracy did not improve from 0.97600
Epoch 6/20
Epoch 00006: val_accuracy improved from 0.97600 to 0.98000, saving model to bert
Epoch 7/20
Epoch 00007: val_accuracy did not improve from 0.98000
Epoch 8/20
Epoch 00008: val_accuracy did not improve from 0.98000
Epoch 9/20
Epoch 00009: val_accuracy did not improve from 0.98000
Restoring model weights from the end of the best epoch.
Epoch 00009: early stopping
train   acc = 99.71%
valid   acc = 98.00%
test    acc = 97.65%
snips


HBox(children=(FloatProgress(value=0.0, max=14484.0), HTML(value='')))


Train on 13084 samples, validate on 700 samples
Epoch 1/20
Epoch 00001: val_accuracy improved from -inf to 0.97857, saving model to bert
Epoch 2/20
Epoch 00002: val_accuracy improved from 0.97857 to 0.98857, saving model to bert
Epoch 3/20
Epoch 00003: val_accuracy did not improve from 0.98857
Epoch 4/20
Epoch 00004: val_accuracy did not improve from 0.98857
Epoch 5/20
Epoch 00005: val_accuracy improved from 0.98857 to 0.99143, saving model to bert
Epoch 6/20
Epoch 00006: val_accuracy did not improve from 0.99143
Epoch 7/20
Epoch 00007: val_accuracy did not improve from 0.99143
Epoch 8/20
Epoch 00008: val_accuracy did not improve from 0.99143
Restoring model weights from the end of the best epoch.
Epoch 00008: early stopping
train   acc = 99.89%
valid   acc = 99.14%
test    acc = 98.29%
