In [1]:
import tensorflow

In [2]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers.legacy import SGD

In [3]:
import random
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('omw-1.4')
nltk.download('wordnet')
nltk.download('punkt')
import tensorflow as tf
from tensorflow.keras import datasets, layers, models


lemmatizer = WordNetLemmatizer()
intents = json.loads(open('intents.json').read())

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


This simple Chatbot is built on a very simlpe intents file to grasp a typical structure of building a Chatbot. 
The steps followed can be found in the following article: 

https://www.projectpro.io/article/python-chatbot-project-learn-to-build-a-chatbot-from-scratch/429#toc-10

### **STEP 1: Create a JSON file of intents:**



*   The patterns correspond to sentences expected from the users, on which the model will be trained
*   Responses correspond to the exact responses the chatbot will generate once it recognizes the corresponding tag (chosen at random from the list of provided responses)


*   Tags are single words which help us classify the patterns; they are the output of a learned model which associates patterns with tags.






In [4]:
# Create a json file of intents: 
intents

{'intents': [{'tag': 'greetings',
   'patterns': ['hello',
    'hi',
    'hey',
    'good morning',
    'good day',
    'greetings',
    "what's up?",
    'How is it going?'],
   'responses': ['Hello', 'Hey!', 'What can I do for you?']},
  {'tag': 'goodbye',
   'patterns': ['bye', 'I am leaving', 'see ya', 'goodbye', 'see you later'],
   'responses': ['Goodbye!', 'Talk to you later!']},
  {'tag': 'name',
   'patterns': ["What's your name?",
    'What should I call you?',
    'Do you have a name?',
    'who are you?',
    'tell me about yourself'],
   'responses': ['You can call me Kiwi!', "I'm Kiwi, Farida's assitant :)"]},
  {'tag': 'order',
   'patterns': ["I'd like to order something",
    'I want to buy something',
    'I want to place an order',
    'What do you recommend?'],
   'responses': ['Let me direct you to our page of currently available restaurants you can order from, at this moment!',
    'There are a ton of options! Let me direct you to them!']},
  {'tag': 'complaint',


### **STEP 2: Prepare the training data:**

In [5]:

###########################################################################################################################
####                                                                                                                   ####
#### STEP 2.1: Tokenize and Lemmatize the needed vocabularies and prepare the inputs and targets of the training data: #### 
####                                                                                                                   ####
###########################################################################################################################

words = []                                    # vocabulary for the patterns (lemmatized single words into their roots)
classes = []                                  # vocabulary for the tags 

data_X = []                                   # To store the patterns as they are
data_y = []                                   # To store the tags corresponding to the patterns in data_X
ignore_letters = ["?", "!", ",", "."]         # Characters to skip over when lemmatizing 

for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        # word_tokenize will split up the sentence into words:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        data_X.append(pattern)
        data_y.append(intent["tag"])

    if intent['tag'] not in classes:
        classes.append(intent['tag'])


# The lemmatizer will bring each word from the vocabulary to its root, skipping over punctuation:
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))


##################################################################################################
####                                                                                          ####
#### STEP 2.2: Create the Bag-of-Words Model: from words to zeros and ones to feed the model: #### 
####                                                                                          ####
##################################################################################################

training = []
output_empty = [0]*len(classes)

for idx, doc in enumerate(data_X):
    bag = []                                    # A bag-of-words list created for each pattern of the same size as words (the patterns vocabulary)
    text = lemmatizer.lemmatize(doc.lower())    # Lemmatize the pattern sentence before matching words with vocabulary

    for word in words:
        bag.append(1) if word in text else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(data_y[idx])] = 1  # A bag-of-words list which contains 1 only for the corresponding tag of the pattern
    training.append([bag, output_row])

random.shuffle(training)
training = np.array(training, dtype=object)

train_x = list(training[:, 0])                  # contains the bag-of-words list for the patterns.
train_y = list(training[:, 1])                  # contains the bag-of-words list for the corresponding tags/classes. 


### **STEP 3: Build and train the model using keras:**

In [6]:
##  Build a simple model with relu activations and a final layer using softmax 
### Make sure the number of outputs of the final layer matches the number of classes there are

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Optimizer to use: we can also use Adam or other:
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7f40ecbf3190>

### **STEP 4: Process user input and generate response:**

In [7]:
# Preprocessing the input: 

def clean_text(text): 
  """ Tokenizes and lemmatizes the incoming user query
      
        Parameters: 
          text(str): The user query
        Returns: 
          tokens(list): a list of separate words' roots from the query
  """          
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens


def bag_of_words(text, vocab): 
  """ Turns the cleaned text into an array of 0 and 1, using the bag-of-words model
      
        Parameters: 
          text(str): The user query, cleaned
          vocab(list): list of vocabulary words
        Returns: 
          (array): a numpy array of 0 and 1, of the same size as vocab
  """     
  tokens = clean_text(text)
  bow = [0]*len(vocab)
  for w in tokens:
    for idx, word in enumerate(vocab):
      if word == w:
        bow[idx] = 1
  return np.array(bow)


def pred_class(text, vocab, labels):
  bow = bag_of_words(text, vocab)
  # Generate probabilities for predictions:
  result = model.predict(np.array([bow]))[0] 
  # Filter out predictions for classes below a certain threshold:  
  thresh = 0.5
  y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh]
  # Sort out predicted classes from highest to lowest probability:
  y_pred.sort(key=lambda x: x[1], reverse=True)
  # Return a list of predicted tags from most to least likely:
  return_list = []
  for r in y_pred:
    return_list.append(labels[r[0]])
  return return_list

def get_response(intents_list, intents_json):
  """ Generates a response randomly, based on the tag given by pred_class function, from the corresponding responses 
      in the json file of intents
    
        Returns: 
          (string): a response to output to the user
  """  
  if len(intents_list)==0:
    result = "Sorry! I don't understand"

  else:
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
      if i["tag"] == tag:
        result = random.choice(i['responses'])
        break
  return result

In [8]:
# Interacting with the chatbot:
intents_json = json.loads(open('intents.json').read())
print("Press 0 if you don't want to chat with our Chatbot")
while True:

  message = input("")
  if message == "0":
    break
  intents_list = pred_class(message, words, classes)
  result = get_response(intents_list, intents_json)
  print(result)

Press 0 if you don't want to chat with our Chatbot
Hi
Hey!
How are you?
I'm Kiwi, Farida's assitant :)
Nice to meet you, Kiwi
You can call me Kiwi!
I wanted to buy something...
There are a ton of options! Let me direct you to them!
I actually bought something earlier and wanted to return it
Sorry! I don't understand
I want to complain about an item
Sorry! I don't understand
I do not like the item I bought
Oh no! Allow me to fix that right away!
Ok, have a good day, bye
Talk to you later!
0
