In [None]:
seed_value = 42
import os

os.environ['PYTHONHASHSEED'] = str(seed_value)
# os.environ["TF_DETERMINISTIC_OPS"] = str(seed_value)
import random

random.seed(seed_value)
import numpy as np

np.random.seed(seed_value)
import tensorflow as tf

tf.random.set_seed(seed_value)

In [None]:
import pandas as pd
from pathlib import Path

# Maximum length needed for zero padding
MAX_LENGTH = 50

In [None]:
augment_df = pd.read_csv('final_augment.csv')
augment_df = augment_df.drop(columns=['Unnamed: 0.1', 'Unnamed: 0', 'index'])
augment_df = augment_df[augment_df['sentence'].notna()]
augment_df

Unnamed: 0,slots,intent_label,sentence
0,O O O O O,balance_query,بگو چند ریال پول دارم.
1,O O O O O,balance_query,پول خرجی تو حسابم چقدره؟
2,O O O O O,balance_query,تو حسابم چقدر پول هست؟
3,O O B-Bankname O O O,balance_query,توی کارت ملتم چقدر پول دارم؟
4,O O B-Bankname O O O,balance_query,توی کارت ملتم چقدر پول دارم؟
...,...,...,...
3302,O O O O B-Amount I-Amount I-Amount O O O O O O...,transaction_card,من قصد دارم مبلغ 200 هزار تومان را از حساب بان...
3303,O O B-Billname I-Billname O O O O O O O O O O,bill_payment,قصد دارم قبض آب ماه جاری رو با استفاده از کارت...
3304,O O O O O O O O O O O O O O,other,می‌خواهم از برنامه بانک برای مدیریت هزینه روزا...
3305,O O O O O O O O O B-Amount I-Amount I-Amount O O,transaction_card,من میخوام برای خرید طلا از حساب بانکیم مبلغ 5 ...


In [None]:
augment_df['intent_label'].value_counts()

bill_payment        1036
transaction_card     926
introduce            471
balance_query        397
transaction_paya     332
other                145
Name: intent_label, dtype: int64

In [None]:
from transformers import BertTokenizer, AutoTokenizer, TFAutoModel

model_name_or_path = "hooshvarelab/bert-fa-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
base_bert_model = TFAutoModel.from_pretrained(model_name_or_path)

In [None]:
import numpy as np

def encode_dataset(tokenizer, text_sequences, max_length):
    """Encodes each Sentence with BERT tokenizer and encoder

    Args:
        tokenizer: ParsBERT Tokenizer
        text_sequences: Input Sentences
        max_length: Maximum length needed for zero padding

    Returns:
        dictionary: encoded sentences and their corresponding attention masks
    """

    token_ids = np.zeros(shape=(len(text_sequences), max_length),
                         dtype=np.int32)

    for i, text_sequence in enumerate(text_sequences):
        tokens = text_sequence.split()
        encoded = tokenizer.convert_tokens_to_ids(tokens)
        token_ids[i, 0:len(encoded)] = encoded

    attention_masks = (token_ids != 0).astype(np.int32)

    return {'input_ids': token_ids, 'attention_masks': attention_masks}

In [None]:
encoded_train = encode_dataset(tokenizer, augment_df["sentence"], MAX_LENGTH)

In [None]:
intent_names = Path("dataset/intents.txt").read_text("utf-8").split()
intent_map = dict((label, idx) for idx, label in enumerate(intent_names))
intent_map

{'transaction_card': 0,
 'transaction_paya': 1,
 'bill_payment': 2,
 'balance_query': 3,
 'introduce': 4,
 'other': 5}

In [None]:
intent_train = augment_df["intent_label"].map(intent_map).values

In [None]:
slot_names = ["[PAD]"]
slot_names += Path("dataset/slots.txt").read_text("utf-8").strip().splitlines()
slot_map = {}
for label in slot_names:
    slot_map[label] = len(slot_map)
slot_map

{'[PAD]': 0,
 'B-Bankname': 1,
 'I-Bankname': 2,
 'B-Billname': 3,
 'I-Billname': 4,
 'B-Cardnumber': 5,
 'B-Accountnumber': 6,
 'B-Username': 7,
 'I-Username': 8,
 'B-Iban': 9,
 'B-Method': 10,
 'I-Method': 11,
 'B-Amount': 12,
 'I-Amount': 13,
 'B-Exprdate': 14,
 'O': 15}

In [None]:
def encode_token_labels(text_sequences, slot_names, tokenizer, slot_map, max_length):
    """After tokenization, some slot lables should be extended, before encoding. this function takes care of both extending and encoding.

    Args:
        text_sequences: a list of sentences
        slot_names: a list of slot names
        tokenizer: ParsBERT Tokenizer
        slot_map: a dictionary used for encoding slot names
        max_length: Maximum length needed for zero padding

    Returns:
        array: an array of encoded slot names
    """

    encoded = np.zeros(shape=(len(text_sequences), max_length), dtype=np.int32)
    for i, (text_sequence, word_labels) in enumerate(zip(text_sequences, slot_names)):
        encoded_labels = []
        for word, word_label in zip(text_sequence.split(), word_labels.split()):
            tokens = word.split()
            if word_label == 'I-Accountnumber':
                word_label = 'B-Accountnumber'
            encoded_labels.append(slot_map[word_label])
            expand_label = word_label.replace("B-", "I-")
            if not expand_label in slot_map:
                expand_label = word_label
            encoded_labels.extend([slot_map[expand_label]] * (len(tokens) - 1))
        encoded[i, 1:len(encoded_labels) + 1] = encoded_labels
    return encoded

In [None]:
slot_train = encode_token_labels(augment_df["sentence"], augment_df["slots"], tokenizer, slot_map, MAX_LENGTH)

In [None]:
from keras.layers import *
from keras.layers.core.embedding import Embedding
from transformers import TFAutoModel, TFAutoModelForTokenClassification

class TransformerEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(TransformerEncoderLayer, self).__init__()

        self.multihead_attention = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.dropout1 = Dropout(rate)
        self.layernorm1 = LayerNormalization(epsilon=1e-6)

        self.ffn = tf.keras.Sequential([
            Dense(dff, activation='relu'),
            Dense(d_model)
        ])
        self.dropout2 = Dropout(rate)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)

    def call(self, inputs, training=False, mask=None):
        attention_output = self.multihead_attention(inputs, inputs, attention_mask=mask)
        attention_output = self.dropout1(attention_output, training=training)
        output1 = self.layernorm1(inputs + attention_output)

        ffn_output = self.ffn(output1)
        ffn_output = self.dropout2(ffn_output, training=training)
        output2 = self.layernorm2(output1 + ffn_output)

        return output2

class JointIntentAndSlotFillingModel(tf.keras.Model):
    def __init__(self, intent_num_labels=None, slot_num_labels=None,
                 model_name=model_name_or_path):

        super().__init__(name="joint_intent_slot")

        self.bert = TFAutoModel.from_pretrained(model_name)
        self.encoder_layer = TransformerEncoderLayer(d_model=768, num_heads=8, dff=2048, rate=0.1)
        self.dropout = Dropout(0.2)
        # self.dropout1 = Dropout(0.25)
        # self.dropout2 = Dropout(0.5)
        self.globalpoll = GlobalAveragePooling1D()
        # self.batch = BatchNormalization()
        self.Dense1 = Dense(128, activation='relu')
        self.Dense2 = Dense(64, activation='relu')
        self.Dense3 = Dense(32, activation='relu')

        self.intent_classifier = Dense(intent_num_labels,
                                       name="intent_classifier")
        self.slot_classifier = Dense(slot_num_labels,
                                     name="slot_classifier")

        self.intent_cnn_1 = Conv1D(filters=256, kernel_size=7, activation='relu', padding='same')
        self.intent_cnn_2 = Conv1D(filters=256, kernel_size=5, activation='relu', padding='same')
        self.intent_cnn_3 = Conv1D(filters=256, kernel_size=1, activation='relu', padding='same')
        self.intent_pooling = MaxPooling1D(pool_size=(64 - 3 + 1), strides=1, padding='same')

        self.mask = Masking()
        self.blstm = Bidirectional(LSTM(128, return_sequences=True))
        self.drp = Dropout(0.2)

    def call(self, inputs, **kwargs):

        #bert
        sequence_output, _ = self.bert(input_ids=inputs['input_ids'],
                                                   attention_mask=inputs['attention_masks'],
                                                   **kwargs).values()


        mask = self.mask(sequence_output)
        blstm = self.blstm(mask)
        drp = self.drp(blstm)
        slot_logits = self.slot_classifier(drp)


        intent_output_1 = self.intent_cnn_1(sequence_output)
        intent_pool_1 = self.intent_pooling(intent_output_1)
        intent_output_2 = self.intent_cnn_2(sequence_output)
        intent_pool_2 = self.intent_pooling(intent_output_2)
        intent_output_3 = self.intent_cnn_3(sequence_output)
        intent_pool_3 = self.intent_pooling(intent_output_3)
        intent_output = tf.keras.layers.concatenate([intent_pool_1, intent_pool_2, intent_pool_3])

        encoder_layer = self.encoder_layer(intent_output)

        intent_output = self.Dense1(encoder_layer)
        # window_output = tf.signal.frame(intent_output, frame_length=768, frame_step=180, pad_end=True, pad_value=0.0)

        intent_output = self.dropout(intent_output)
        intent_output = self.Dense2(intent_output)
        intent_output = self.Dense3(intent_output)
        # reshaped_output = tf.reshape(intent_output, (1, -1, intent_output.shape[-1]))
        global_cnn = self.globalpoll(intent_output)

        intent_logits = self.intent_classifier(global_cnn)

        return slot_logits, intent_logits


joint_model = JointIntentAndSlotFillingModel(intent_num_labels=len(intent_map), slot_num_labels=len(slot_map))

Some layers from the model checkpoint at hooshvarelab/bert-fa-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at hooshvarelab/bert-fa-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# joint_model.layers[0].trainable = False
# joint_model.intent_classifier.trainable = False

In [None]:
from keras.metrics import SparseCategoricalAccuracy
from keras.losses import SparseCategoricalCrossentropy
from keras.optimizers import Adam

adam = Adam(learning_rate=3e-5, epsilon=1e-08)
losses = [SparseCategoricalCrossentropy(from_logits=True),
          SparseCategoricalCrossentropy(from_logits=True)]
metrics = [SparseCategoricalAccuracy('accuracy')]
joint_model.compile(optimizer=adam, loss=losses, metrics=metrics)

In [None]:
history = joint_model.fit(
    encoded_train, (slot_train, intent_train),
    shuffle=True,
    # validation_split=0.2,
    epochs=10, batch_size=8)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
