In [1]:
import random
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import nltk 

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
intents = json.loads(open('intents.json').read())

In [4]:
words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [5]:
words = [word for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))

### **Calculate X**

In [6]:
question = []
for doc in intents['intents']:
  question.append(doc['patterns'])

In [7]:
question_list = []
arr = [[question_list.append(x) for x in q] for q in question]

In [8]:
question_list

['Hi there',
 'How are you',
 'How are you?',
 'Is anyone there?',
 'Hey',
 'Hola',
 'Hello',
 'Good day',
 'Bye',
 'See you later',
 'Goodbye',
 'Nice chatting to you, bye',
 'Till next time',
 'Thanks',
 'Thank you',
 "That's helpful",
 'Awesome, thanks',
 'Thanks for helping me',
 'what is your name',
 'what should I call you',
 'whats your name?',
 'who are you?',
 'how old',
 'old',
 'how old is florian',
 'what is your age',
 'old are you',
 'age?',
 'How you could help me?',
 'What you can do?',
 'What help you provide?',
 'How you can be helpful?',
 'What support is offered',
 'Can a student who has received a temporary certificate of completion be able to return to education?',
 'Can a graduate student who has a temporary certificate return to study?',
 'Can a graduate associate who has a temporary certificate return to study?',
 "What is the email address of the university's educational administration?",
 "What is the university's educational management email?",
 'Educational

### **Calculate Y**

In [9]:
tag = []
for doc in documents:
  tag.append(doc[1])

In [10]:
Y = pd.get_dummies(tag)

In [11]:
num_unique_tags = len(Y.nunique())

In [12]:
num_unique_tags

133

### **X and Y**

In [13]:
X_train = np.array(question_list)[0:50]
y_train = np.array(Y)[0:50]
X_test = np.array(question_list)[50:]
y_test = np.array(Y)[50:]

In [14]:
print(len(X_train))
print(len(y_train))
print(len(X_test))
print(len(y_test))

50
50
356
356


### **Fine Tune on BERT**

In [15]:
!pip install -q transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m85.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m110.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [16]:
from transformers import AutoTokenizer, TFBertModel
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
bert = TFBertModel.from_pretrained('bert-base-cased')

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/527M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [17]:
train_encodings = tokenizer(
    text=X_train.tolist(),
    add_special_tokens=True,
    max_length=9,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)

test_encodings = tokenizer(
    text=X_test.tolist(),
    add_special_tokens=True,
    max_length=9,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)

In [18]:
train_encodings

{'input_ids': <tf.Tensor: shape=(50, 9), dtype=int32, numpy=
array([[  101,  8790,  1175,   102,     0,     0,     0,     0,     0],
       [  101,  1731,  1132,  1128,   102,     0,     0,     0,     0],
       [  101,  1731,  1132,  1128,   136,   102,     0,     0,     0],
       [  101,  2181,  2256,  1175,   136,   102,     0,     0,     0],
       [  101,  4403,   102,     0,     0,     0,     0,     0,     0],
       [  101,  9800,  1742,   102,     0,     0,     0,     0,     0],
       [  101,  8667,   102,     0,     0,     0,     0,     0,     0],
       [  101,  2750,  1285,   102,     0,     0,     0,     0,     0],
       [  101, 17774,   102,     0,     0,     0,     0,     0,     0],
       [  101,  3969,  1128,  1224,   102,     0,     0,     0,     0],
       [  101, 15938,   102,     0,     0,     0,     0,     0,     0],
       [  101,  8835, 25132,  1106,  1128,   117, 11901,   102,     0],
       [  101, 22430,  1397,  1159,   102,     0,     0,     0,     0],
   

In [19]:
y_train[0:10]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [20]:
print(len(train_encodings['input_ids']))
print(len(y_train))

50
50


In [21]:
input_ids = train_encodings['input_ids']
attention_mask = train_encodings['attention_mask']

### **Model Building**

In [22]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Dense

In [30]:
max_len = 9
input_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
input_mask = Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")
embeddings = bert(input_ids, attention_mask=input_mask)[0] 
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
out = Dense(128, activation='relu')(out)
out = tf.keras.layers.Dropout(0.1)(out)
out = Dense(32, activation = 'relu')(out)
y = Dense(num_unique_tags, activation='sigmoid')(out)
model = tf.keras.Model(inputs=[input_ids, input_mask], outputs=y)
model.layers[2].trainable = True

In [31]:
optimizer = Adam(learning_rate=5e-05)
loss =CategoricalCrossentropy(from_logits=True)
metric = CategoricalAccuracy('balanced_accuracy'),
# Compile the model
model.compile(optimizer=optimizer, loss=loss, metrics=metric)

In [32]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 9)]          0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 9)]          0           []                               
                                                                                                  
 tf_bert_model (TFBertModel)    multiple             108310272   ['input_ids[0][0]',              
                                                                  'attention_mask[0][0]']         
                                                                                                  
 global_max_pooling1d_2 (Global  (None, 768)         0           ['tf_bert_model[2][0]']    

In [33]:
train_history = model.fit(
    x = {'input_ids':train_encodings['input_ids'],'attention_mask':train_encodings['attention_mask']},
    y = y_train,
    validation_data = (
    {'input_ids': test_encodings['input_ids'], 'attention_mask': test_encodings['attention_mask']}, y_test
    ),
    epochs=30,
    batch_size=36
)

Epoch 1/30


  output, from_logits = _get_logits(


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [34]:
model.save('chatbot_BERT_model.h5')

### **Generetor**

In [50]:
from transformers import MT5ForConditionalGeneration, T5Tokenizer

In [51]:
mt5 = MT5ForConditionalGeneration.from_pretrained("google/mt5-base")
mt5_tok = T5Tokenizer.from_pretrained("google/mt5-base")

Downloading (…)lve/main/config.json:   0%|          | 0.00/702 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

ImportError: ignored

In [None]:
input_ids = mt5_tok("explain sentiment: I went to see this movie with my husband, and we both thought the acting was terrible!", return_tensors="pt").input_ids  # in the language of your choice
labels = mt5_tok("negative explanation: the acting was terrible.", return_tensors="pt").input_ids  # in the language of your choice

loss = mt5(input_ids=input_ids, labels=labels).loss

### **Prediction**

In [41]:
import pickle

In [42]:
intents = json.loads(open('intents.json').read())

In [43]:
from keras.models import load_model
from transformers import AutoTokenizer, TFBertModel
# load model
model = load_model('chatbot_BERT_model.h5', custom_objects={'TFBertModel': TFBertModel})

In [44]:
model

<keras.engine.functional.Functional at 0x7f014d7c1100>

In [45]:
def prediction(text):
  x_val = tokenizer(
    text=text,
    add_special_tokens=True,
    max_length=9,
    truncation=True,
    padding='max_length', 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True) 
  
  validation = model.predict({'input_ids': x_val['input_ids'], 'attention_mask': x_val['attention_mask']})
  tag_index = np.where(validation[0] == max(validation[0]))[0][0]
  result = Y.columns[tag_index]
  return result

In [46]:
prediction('how are you?')



'greeting'

In [47]:
def get_response(text):
  tag = prediction(text)
  for data in intents['intents']:
    if data['tag'] == tag:
      result = random.choice(data['responses'])
      break
  return result

In [48]:
get_response('hi')



'Hi there, how can I help?'

In [None]:
while True:
	message = input("Enter a message:")
	res = get_response(message)
	print(res)