In [1]:
import pandas as pd
from transformers import AutoTokenizer

In [2]:
from IPython.display import display

In [3]:
df = pd.read_csv('flyeasy_dataset.csv')
df.head()

Unnamed: 0,id,user_query,intent,entities,response_template
0,10,Do I have any pending travel approvals?,CheckTravelApprovals,{},Checking your travel approval status...
1,3,Show me my hotel bookings,ViewHotelBookings,{},Here are your current hotel bookings: [dummy_b...
2,5,"Yes, book the return ticket too",BookReturnFlight,{},Your return flight has also been booked.
3,7,Remind me to apply for a visa next Monday,SetReminder,"{'task': 'apply for a visa', 'date': 'next Mon...",Reminder set to {task} on {date}.
4,6,I need a hotel in Tokyo from 20th to 25th August,BookHotel,"{'location': 'Tokyo', 'check_in': '20th August...",A hotel in {location} from {check_in} to {chec...


# Subword Tokenization using AutoTokenzer

In [4]:
# Initialize the Auto Tokenzier

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [7]:
queries = df["user_query"].tolist()

In [9]:
output_queries = [tokenizer.tokenize(q) for q in queries]

In [11]:
queries[0]

'Do I have any pending travel approvals?'

In [10]:
output_queries[0]

['do', 'i', 'have', 'any', 'pending', 'travel', 'approval', '##s', '?']

# Attention View using BERT Transformer

In [12]:
from transformers import BertTokenizer, BertModel
from bertviz import head_view
import torch

In [14]:
# Initialize the BERT Tokenizer and BERT Transformer Model

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased", output_attentions=True)
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
  

In [15]:
# Input Query

query = "Remind me to apply for a visa next Monday"

In [16]:
inputs = tokenizer(query, return_tensors="pt")
inputs

{'input_ids': tensor([[  101, 10825,  2033,  2000,  6611,  2005,  1037,  9425,  2279,  6928,
           102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [18]:
# Extract Input_Ids and keep it at Var

input_ids = inputs['input_ids']
input_ids

tensor([[  101, 10825,  2033,  2000,  6611,  2005,  1037,  9425,  2279,  6928,
           102]])

In [21]:
input_ids[0]

tensor([  101, 10825,  2033,  2000,  6611,  2005,  1037,  9425,  2279,  6928,
          102])

In [22]:
# Forward Pass with Attention

with torch.no_grad():
    outputs = model(**inputs)

In [26]:
attentions = outputs.attentions

In [25]:
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])

In [27]:
head_view(attentions, tokens)


<IPython.core.display.Javascript object>

In [28]:
attentions

(tensor([[[[5.3146e-02, 3.6904e-02, 4.8703e-02,  ..., 7.2474e-02,
            4.6568e-02, 2.7284e-01],
           [8.9567e-02, 4.7705e-02, 1.3263e-01,  ..., 1.1301e-01,
            9.6432e-02, 1.0351e-01],
           [1.3090e-01, 7.9804e-02, 1.2607e-01,  ..., 8.4707e-02,
            9.3145e-02, 1.1896e-01],
           ...,
           [4.4270e-02, 7.6238e-02, 9.9403e-02,  ..., 6.9652e-02,
            9.1596e-02, 6.7446e-02],
           [7.9133e-02, 8.5030e-02, 2.0121e-01,  ..., 8.1532e-02,
            7.5521e-02, 8.8379e-02],
           [9.5132e-02, 5.9942e-02, 1.2461e-01,  ..., 8.6469e-02,
            5.0959e-02, 1.4679e-01]],
 
          [[4.0883e-01, 1.5790e-03, 2.0262e-03,  ..., 4.7177e-03,
            1.7036e-03, 4.1907e-03],
           [4.9225e-02, 2.4729e-02, 7.0518e-02,  ..., 9.1814e-02,
            2.1735e-01, 1.2451e-01],
           [2.6915e-02, 1.7665e-01, 3.3394e-02,  ..., 1.3118e-01,
            1.6739e-01, 2.7669e-02],
           ...,
           [1.1904e-02, 8.7475e-02, 3.

# BERT Embeddings

In [43]:
query1 = "Deep learning is origin for Transformers"
query2 = "Cancel my ticket to movie next friday"

In [44]:
input1 = tokenizer(query1, return_tensors="pt")
input2 = tokenizer(query2, return_tensors="pt")

In [45]:
input1

{'input_ids': tensor([[  101,  2784,  4083,  2003,  4761,  2005, 19081,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}

In [46]:
with torch.no_grad():
    outputs1 = model(**input1)
    outputs2 = model(**input2)

In [47]:
embeddings1 = outputs1.last_hidden_state[0][0]
embeddings2 = outputs2.last_hidden_state[0][0]

In [54]:
torch.nn.functional.cosine_similarity(embeddings1, embeddings2, dim=0).item()

0.8562809228897095

In [56]:
embeddings2.shape

torch.Size([768])