# Intent Recognition with BERT using Keras and TensorFlow 2

In [3]:
!pip install tqdm  >> /dev/null
!pip install bert-for-tf2 >> /dev/null
!pip install sentencepiece >> /dev/null

In [4]:
import os
import math
import datetime

from tqdm import tqdm

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras

import bert
from bert import BertModelLayer
from bert.loader import StockBertConfig, map_stock_config_to_params, load_stock_weights
from bert.tokenization.bert_tokenization import FullTokenizer

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib import rc

from sklearn.metrics import confusion_matrix, classification_report

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize'] = 12, 8

RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Data


In [None]:
# !gdown --id 1OlcvGWReJMuyYQuOZm149vHWwPtlboR6 --output train.csv
# !gdown --id 1Oi5cRlTybuIF2Fl5Bfsr-KkqrXrdt77w --output valid.csv
# !gdown --id 1ep9H6-HvhB4utJRLVcLzieWNUSG3P_uF --output test.csv

In [6]:
# read data

train = pd.read_csv("Designintent1_train.csv")
valid = pd.read_csv("Designintent1_valid.csv")
test = pd.read_csv("Designintent1_test.csv")

train = train.append(valid).reset_index(drop = True)
train.shape

(450, 2)

In [7]:
train.head()

Unnamed: 0,body,intent_1
0,Hi piston! My crankshaft depends on your bore ...,Providing information
1,thank you!,NAN
2,How low can you go on the piston bore diameter?,Asking for information
3,"all my fos values are in the hundreds, i suspe...",Asking for information
4,Hi flywheel! My crankshaft depends on your fly...,Providing information


# Intent Recognition with BERT

In [8]:
!wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip

/bin/bash: wget: command not found


In [9]:
!unzip uncased_L-12_H-768_A-12.zip

unzip:  cannot find or open uncased_L-12_H-768_A-12.zip, uncased_L-12_H-768_A-12.zip.zip or uncased_L-12_H-768_A-12.zip.ZIP.


In [10]:
os.makedirs("model", exist_ok = True)
!mv uncased_L-12_H-768_A-12/ model

mv: rename uncased_L-12_H-768_A-12/ to model/uncased_L-12_H-768_A-12/: No such file or directory


In [11]:
bert_model_name="uncased_L-12_H-768_A-12"

bert_ckpt_dir = os.path.join("model/", bert_model_name)
bert_ckpt_file = os.path.join(bert_ckpt_dir, "bert_model.ckpt")
bert_config_file = os.path.join(bert_ckpt_dir, "bert_config.json")

## Preprocessing

In [12]:
class IntentDetectionData:
  DATA_COLUMN = "text"
  LABEL_COLUMN = "intent"

  def __init__(self, train, test, tokenizer: FullTokenizer, classes, max_seq_len=192):
    self.tokenizer = tokenizer
    self.max_seq_len = 0
    self.classes = classes
    
    ((self.train_x, self.train_y), (self.test_x, self.test_y)) = map(self._prepare, [train, test])

    print("max seq_len", self.max_seq_len)
    self.max_seq_len = min(self.max_seq_len, max_seq_len)
    self.train_x, self.test_x = map(self._pad, [self.train_x, self.test_x])

  def _prepare(self, df):
    x, y = [], []
    
    for _, row in tqdm(df.iterrows()):
      text, label = row[IntentDetectionData.DATA_COLUMN], row[IntentDetectionData.LABEL_COLUMN]
      tokens = self.tokenizer.tokenize(text)
      tokens = ["[CLS]"] + tokens + ["[SEP]"]
      token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
      self.max_seq_len = max(self.max_seq_len, len(token_ids))
      x.append(token_ids)
      y.append(self.classes.index(label))

    return np.array(x), np.array(y)

  def _pad(self, ids):
    x = []
    for input_ids in ids:
      input_ids = input_ids[:min(len(input_ids), self.max_seq_len - 2)]
      input_ids = input_ids + [0] * (self.max_seq_len - len(input_ids))
      x.append(np.array(input_ids))
    return np.array(x)

In [13]:
tokenizer = FullTokenizer(vocab_file=os.path.join(bert_ckpt_dir, "vocab.txt"))

tokenizer.tokenize("I can't wait to visit Bulgaria again!")

NotFoundError: model/uncased_L-12_H-768_A-12/vocab.txt; No such file or directory

In [None]:
tokens = tokenizer.tokenize("I can't wait to visit Bulgaria again!")
tokenizer.convert_tokens_to_ids(tokens)

In [None]:
def create_model(max_seq_len, bert_ckpt_file):

  with tf.io.gfile.GFile(bert_config_file, "r") as reader:
      bc = StockBertConfig.from_json_string(reader.read())
      bert_params = map_stock_config_to_params(bc)
      bert_params.adapter_size = None
      bert = BertModelLayer.from_params(bert_params, name = "bert")
        
  input_ids = keras.layers.Input(shape = (max_seq_len, ), dtype = 'int32', name = "input_ids")
  bert_output = bert(input_ids)

  print("bert shape", bert_output.shape)

  cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
  cls_out = keras.layers.Dropout(0.5)(cls_out)
  logits = keras.layers.Dense(units = 768, activation = "tanh")(cls_out)
  logits = keras.layers.Dropout(0.5)(logits)
  logits = keras.layers.Dense(units = len(classes), activation = "softmax")(logits)

  model = keras.Model(inputs = input_ids, outputs = logits)
  model.build(input_shape = (None, max_seq_len))

  load_stock_weights(bert, bert_ckpt_file)
        
  return model

## Training

In [None]:
classes = train.intent.unique().tolist()

data = IntentDetectionData(train, test, tokenizer, classes, max_seq_len = 128)

In [None]:
data.train_x.shape

In [None]:
data.train_x[0]

In [None]:
model = create_model(data.max_seq_len, bert_ckpt_file)

In [None]:
model.summary()

In [None]:
model.compile(
  optimizer = keras.optimizers.Adam(1e-5),
  loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
  metrics = [keras.metrics.SparseCategoricalAccuracy(name = "acc")]
)

In [None]:
history = model.fit(
  x=data.train_x, 
  y=data.train_y,
  validation_split = 0.1,
  batch_size = 16,
  shuffle = True,
  epochs = 3
)

## Evaluation

In [None]:
ax = plt.figure().gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(history.history['loss'])
ax.plot(history.history['val_loss'])
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'test'])
plt.title('Loss over training epochs')
plt.show();

In [None]:
ax = plt.figure().gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(history.history['acc'])
ax.plot(history.history['val_acc'])
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'test'])
plt.title('Accuracy over training epochs')
plt.show();

In [None]:
_, train_acc = model.evaluate(data.train_x, data.train_y)
_, test_acc = model.evaluate(data.test_x, data.test_y)

print("train acc", train_acc)
print("test acc", test_acc)

In [None]:
y_pred = model.predict(data.test_x).argmax(axis = -1)

In [None]:
print(classification_report(data.test_y, y_pred, target_names = classes))

In [None]:
cm = confusion_matrix(data.test_y, y_pred)
df_cm = pd.DataFrame(cm, index = classes, columns = classes)

In [None]:
hmap = sns.heatmap(df_cm, annot = True, fmt = "d")
hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation = 0, ha = 'right')
hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation = 30, ha = 'right')
plt.ylabel('True label')
plt.xlabel('Predicted label');

In [None]:
sentences = [
  "Play my favourite song right now on the player. I haven't listened the song from many days",
  "Rate this book as awful because the content of this book is really horrible",
  "Can someone tell me the temperature of Delhi. I am shivering even in 3 layers.",
  "I am too much hungry. Can we go to restaurant for eating. I can't wait for long.",
  "I wrote this new book could you take a second to rate it?",
  "I love the mood, lets play some good songs",
  "I am starving, let's eat outside already",
  "Let me check how cold it is outside",
  "lets put on some songs, any suggestions?",
  "play the one you played last night",
  "Let me add play that song next",
  "check this artwork out",
  "where is that movie playing? Is it even still playing?",
  "Let's eat some waffles outside!",
  "This book needs a lot of feedback",
  "I want to watch that movie",
  "The dal is very cold",
  "Where is that art mueseum",
  "Who made this painting?",
  "The art is done by kadinsky isnt it?",
  "Help me buy this book",
  "lets but this earphones",
  "I need motivation to write a book"
]

pred_tokens = map(tokenizer.tokenize, sentences)
pred_tokens = map(lambda tok: ["[CLS]"] + tok + ["[SEP]"], pred_tokens)
pred_token_ids = list(map(tokenizer.convert_tokens_to_ids, pred_tokens))

pred_token_ids = map(lambda tids: tids + [0] * (data.max_seq_len-len(tids)), pred_token_ids)
pred_token_ids = np.array(list(pred_token_ids))

In [None]:
predictions = model.predict(pred_token_ids).argmax(axis = -1)

for text, label in zip(sentences, predictions):
  print("text:", text, "\nintent:", classes[label])
  print()