<a href="https://colab.research.google.com/github/chararchter/intent-detection/blob/master/test_embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [15]:
import os
from pathlib import Path
from typing import List

import tensorflow as tf
from keras.layers import (Dense, Conv1D)
from keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from transformers import BertTokenizer, TFBertModel

In [16]:
# define model and tokenizer
model_name = "bert-base-multilingual-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model_bert = TFBertModel.from_pretrained(model_name)

Some layers from the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [17]:
# load the labels
if not os.path.isfile("chatbot_test_ans.txt"):
  !wget https://raw.githubusercontent.com/tilde-nlp/NLU-datasets/master/chatbot/chatbot_test_ans.txt

In [18]:
with open("chatbot_test_ans.txt", "r") as f:
  test_answers = f.readlines()

test_answers

['FindConnection\n',
 'DepartureTime\n',
 'DepartureTime\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'DepartureTime\n',
 'FindConnection\n',
 'F

In [19]:
os.path.isfile("chatbot_test_q.txt")

True

In [20]:
# load the training data
if not os.path.isfile("chatbot_test_q.txt"):
  !wget https://raw.githubusercontent.com/tilde-nlp/NLU-datasets/master/chatbot/en/chatbot_test_q.txt

In [21]:
with open("chatbot_test_q.txt", "r") as f:
  en_test = f.readlines()

en_test

['i want to go marienplatz\n',
 'when is the next train in muncher freiheit?\n',
 'when does the next u-bahn leaves from garching forschungszentrum?\n',
 'from olympia einkaufszentrum to hauptbahnhof\n',
 'when is the next train from winterstraße 12 to kieferngarten\n',
 'when is the next rocket from winterstraße 12 to kieferngarte\n',
 'can you find a connection from garching to hauptbahnhof?\n',
 'how to get from untere strassäcker 21 to fröttmaning\n',
 'how i can get from marienplatz to garching\n',
 'connection from boltzmannstraße to kieferngarten\n',
 'how to get from bonner platz to freimann?\n',
 'when is the next s-bahn leaving at garching?\n',
 'how do i get from oez to hbf?\n',
 'how to get from winterstrasse 12 to fröttmaning\n',
 'how do i get from garching forschungszentrum to pasing\n',
 'theresienstraße to assling\n',
 'how can i get from theresienstraße to munich east?\n',
 'when does the next bus starts from garching?\n',
 'from quiddestraße to garching?\n',
 'can yo

In [22]:
assert len(test_answers) == len(en_test)

In [23]:
def create_model_one_layer(units: int, batch_size: int, sentence_length: int, **kwargs):
    """
    returns <tf.Tensor: shape=(1, batch_size, 1, units), dtype=float32
    e.g. <tf.Tensor: shape=(1, 4, 1, 2), dtype=float32
    where 4 = batch_size, 2 = units
    """
    model = Sequential()
    model.add(tf.keras.Input(shape=(batch_size, sentence_length, 768))) # from shape=(1, 9, 768)
    model.add(Dense(units, activation='softmax'))
    model.add(Conv1D(units, sentence_length, padding="valid", activation="softmax"))
    return model


def create_adam_optimizer(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0, epsilon=None, amsgrad=False):
    return tf.keras.optimizers.legacy.Adam(learning_rate=lr, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, decay=decay, amsgrad=amsgrad)


def encode_labels(test_answers: List) -> List:
    """ Encode labels in one hot-encoding
    'FindConnection' corresponds to [[1, 0]]
    'DepartureTime' corresponds to [[0, 1]]
    """
    y_train = []
    for answer in test_answers:
        if answer == 'FindConnection':
            y_train.append([[1, 0]])
        else:
            y_train.append([[0, 1]])
    return y_train


def expand_dimensions(y_train: List):
    y_train_tensor = tf.convert_to_tensor(y_train, dtype=float)
    return tf.expand_dims(y_train_tensor, axis=0)

In [24]:
batch_size = 32
sentence_length = 20
learning_rate = 0.03
number_of_epochs = 5

optimizer = create_adam_optimizer(lr=learning_rate)
classification_model = create_model_one_layer(units=2, batch_size=batch_size, sentence_length=sentence_length)

classification_model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    # loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [25]:
for index in range(len(test_answers)//batch_size):
    text = en_test[index*batch_size:(index+1)*batch_size]
    encoded_input = tokenizer(text, padding='max_length', max_length=sentence_length, truncation=True, return_tensors='tf')
    classification_input = tf.expand_dims(model_bert(encoded_input)["last_hidden_state"], axis=0)

    labels = encode_labels(test_answers)
    labels_expanded = expand_dimensions(labels[index*batch_size:(index+1)*batch_size])
    classification_model.fit(classification_input, y=labels_expanded, epochs=number_of_epochs)
    # print(classification_model(classification_input))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
