In [1]:
from transformers import AutoModelWithLMHead, AutoTokenizer, BertTokenizer, BertForMaskedLM
import torch
import torch.nn as nn
import pytorch_lightning as pl 
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings("ignore")

def print_model_info(model):
    # Get all of the model's parameters as a list of tuples.
    params = list(model.named_parameters())

    print('The BERT model has {:} different named parameters.\n'.format(len(params)))

    print('==== Embedding Layer ====\n')

    for p in params[0:5]:
        print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

    print('\n==== First Transformer ====\n')

    for p in params[5:21]:
        print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

    print('\n==== Output Layer ====\n')

    for p in params[-4:]:
        print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

# def tokenize_sentences(sentences):
#     # Tokenize all of the sentences and map the tokens to thier word IDs.
#     input_ids = []
#     attention_masks = []

#     # For every sentence...
#     for sent in sentences:
#         # encode_plus will:
#         #   (1) Tokenize the sentence.
#         #   (2) Prepend the [CLS] token to the start.
#         #   (3) Append the [SEP] token to the end.
#         #   (4) Map tokens to their IDs.
#         #   (5) Pad or truncate the sentence to max_length
#         #   (6) Create attention masks for [PAD] tokens.
#         encoded_dict = tokenizer.encode_plus(
#                             sent,                      # Sentence to encode.
#                             add_special_tokens = True, # Add '[CLS]' and '[SEP]'
#                             max_length = 320,           # Pad & truncate all sentences.
#                             padding='max_length',
#                             return_attention_mask = True,   # Construct attn. masks.
#                             return_tensors = 'pt',     # Return pytorch tensors.
#                     )
        
#         # Add the encoded sentence to the list.    
#         input_ids.append(encoded_dict['input_ids'])
        
#         # And its attention mask (simply differentiates padding from non-padding).
#         attention_masks.append(encoded_dict['attention_mask'])

#     # Convert the lists into tensors.
#     input_ids = torch.cat(input_ids, dim=0)
#     attention_masks = torch.cat(attention_masks, dim=0)
#     return input_ids,attention_masks        

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
special_tokens_dict = {'additional_special_tokens': ['[BLK]']}
tokenizer.add_special_tokens(special_tokens_dict)

class BertPred(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = BertForMaskedLM.from_pretrained('bert-base-cased')
        self.bert.resize_token_embeddings(len(tokenizer))
        print_model_info(self.bert)

    def forward(self, input_ids, attention_mask, labels=None):
        output = self.bert(input_ids, attention_mask=attention_mask)
        loss = 0
#         if labels is not None:
#             loss = self.criterion(output, labels)
        return loss, output

new_model = BertPred()
new_model.load_state_dict(torch.load('saved.bin'))
new_model.eval()

chk_path = "/h/elau/Conv_BERT/BERT_MLM_OOD/checkpoints_OOD/best-checkpoints.ckpt"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# checkpoint = torch.load('saved.bin', map_location=torch.device('cpu'))

# model = BertForMaskedLM.from_pretrained("bert-base-cased")
# model.resize_token_embeddings(len(tokenizer))

# print(model.state_dict().bert.keys())

#load specified model state
# print(checkpoint.keys())
# # print(checkpoint["state_dict"].keys())
# model.load_state_dict(checkpoint)
# model.eval()
new_model.to(device)
model = new_model

Loading BERT tokenizer...


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The BERT model has 202 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28997, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

In [None]:
while(True):
    string = input("How may I help you? Question: ")
    #tokenize inputted sentence to be compatible with BERT inputs
#     token_ids,attention_masks = tokenize_sentences([string + " the reason of the question is " + " [BLK]"*6])
    #get a tensor containing probabilities of inputted sentence being irrelevant or relevant
#     token_logits = (model(token_ids.to(device)))
#     softmax_layer = torch.nn.Softmax()
#     result = softmax_layer(model_outputs[0])
#     #identify which output node has higher probability and what that probability is
#     prediction = torch.argmax(result).item()
#     confidence = torch.max(result).item()
    
#     print(LABEL_COLUMNS[prediction])
#     print("{:.2f}% confident".format(confidence*100))
    string = string + " the reason of the question is " + " [MASK]"*6
    
    inputs = tokenizer.encode_plus(
      string,
      add_special_tokens=True,
      max_length=300,
      return_token_type_ids=False,
      padding="max_length",
      return_attention_mask=True,
      return_tensors='pt',
    )
    mask_token_index = torch.where(inputs['input_ids'] == tokenizer.mask_token_id)[1]
    token_logits = model(inputs['input_ids'].to(device), inputs['attention_mask'].to(device))[1]
    predicted_intent = ''
    for ele in mask_token_index[0:2]: 
        mask_token_logits = token_logits[0][0, [ele], :]
        top_5_tokens = torch.topk(mask_token_logits, 1, dim=1).indices[0].tolist()
        for token in top_5_tokens:
            predicted_intent = predicted_intent + " " + tokenizer.decode([token])
    print("I see. The problem is related to: " + predicted_intent)
    print('='*80)

How may I help you? Question: I want to do a cash top-up
I see. The problem is related to:  topping cash
How may I help you? Question: How long until my card is delivered?
I see. The problem is related to:  visa card
