In [6]:
#Mounting the Google Drive
from google.colab import drive
drive.mount('/content/drive')
data_root='/content/drive/My Drive/projects/chat-bot'
#Please upload the files in your drive and change the path to it accordingly.

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
#2 Importing Relevant Libraries
import json
import string
import random

import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [8]:
#Loading the Dataset: intents.json
data_file = open(data_root + '/intents_new.json').read()
data = json.loads(data_file)

In [9]:
data

{'intents': [{'tag': 'google',
   'patterns': ['google', 'search', 'internet'],
   'responses': ['Redirecting to Google...']},
  {'tag': 'greeting',
   'patterns': ['Hi there',
    'How are you',
    'Is anyone there?',
    'Hey',
    'Hola',
    'Hello',
    'Good day',
    'Namaste',
    'yo'],
   'responses': ['Hello',
    'Good to see you again',
    'Hi there, how can I help?'],
   'context': ['']},
  {'tag': 'goodbye',
   'patterns': ['Bye',
    'See you later',
    'Goodbye',
    'Get lost',
    'Till next time',
    'bbye'],
   'responses': ['See you!', 'Have a nice day', 'Bye! Come back again soon.'],
   'context': ['']},
  {'tag': 'thanks',
   'patterns': ['Thanks',
    'Thank you',
    "That's helpful",
    'Awesome, thanks',
    'Thanks for helping me'],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure'],
   'context': ['']},
  {'tag': 'noanswer',
   'patterns': [],
   'responses': ["Sorry, can't understand you",
    'Please give me more info',
    'Not sure I u

In [10]:
#Creating data X and data_Y

words = [] #For Bow nodel/ vocabulary for patterns
classes = [] #For Bow model/ vocabulary for tags
data_X = [] #For storing each pattern
data_y = [] #For storing tag corresponding to each pattern in data X

# Iterating over all the intents

for intent in data["intents"]:
  for pattern in intent["patterns"]:
    tokens = nltk.word_tokenize(pattern) # tokenize each pattern.
    words.extend(tokens) #and append tokens to words
    data_X.append(pattern) #appending pattern to data X
    data_y.append(intent["tag"]), #appending the associated tag to each pattern

  # adding the tag to the classes if it's not there already
  if intent["tag"] not in classes:
    classes.append(intent["tag"])

# initializing lemmatizer to get stem of words
lemmatizer = WordNetLemmatizer()


#lemmatize all the words in the vocab and convert them lowercase
#if the words dont apeear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
#sorting the vocab and classes in alphabetical order and taking the # set to ensure no duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))

In [11]:
#5 Text to Numbers
training = []
out_empty = [0]*len(classes)
# creating the bag of words model

for idx, doc in enumerate(data_X):
  bow  =[]
  text = lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)
  # mark the index of class that the current pattern is associated
  #to
  output_row = list(out_empty)
  output_row[classes.index(data_y[idx])] = 1
  #add the one hot encoded Boll and associated classes to training
  training.append([bow, output_row])
#shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype=object)
#split the features and target labels
train_X= np.array(list(training[:, 0]))
train_Y= np.array(list(training[:, 1]))

In [12]:
#6 The Neural Network Model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_X[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense (64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_Y[0]), activation = "softmax"))
adam = tf.keras.optimizers.legacy.Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=["accuracy"])
print(model.summary())
model.fit(x=train_X, y=train_Y, epochs=150, verbose=1)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               16640     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 31)                2015      
                                                                 
Total params: 26911 (105.12 KB)
Trainable params: 26911 (105.12 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/150
E

<keras.src.callbacks.History at 0x7a0690f359c0>

In [13]:
#7 Preprocessing the Input
def clean_text(text):
  tokens = nltk.word_tokenize (text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens

def bag_of_words (text, vocab):
  tokens = clean_text(text)
  bow = [0]*len(vocab)
  for w in tokens:
    for idx, word in enumerate(vocab):
      if word == w:
        bow[idx]=1
  return np.array (bow)

def pred_class(text, vocab, labels):
  bow = bag_of_words(text, vocab)
  result = model.predict(np.array([bow]))[0] #Extracting probabilities
  thresh = 0.5
  y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh]
  y_pred.sort(key=lambda x: x[1], reverse=True) #Sorting by values of probability in decreasing order
  return_list = []
  for r in y_pred:
     return_list.append(labels[r[0]]) #Contains labels (tags) for highest probability
  return return_list

def get_response(intents_list, intents_json):
  if len(intents_list) == 0:
    result = "Sorry! I don't understand."
  else:
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
      if i["tag"] == tag:
        result = random.choice(i["responses"])
        break
  return result

In [14]:
#8 Interacting with the chatbot
print("Press Ø if you don't want to chat with our ChatBot.")
while True:
  message = input("")
  if message == "0":
    break
  intents=pred_class(message, words, classes)
  result = get_response (intents, data)
  print(result)

Press Ø if you don't want to chat with our ChatBot.
google
Redirecting to Google...
images
Sorry! I don't understand.
google
Redirecting to Google...
google
Redirecting to Google...

Sorry! I don't understand.
hey
Good to see you again
bye
See you!
.
Sorry! I don't understand.
who are you
I am Ted, a Deep-Learning chatbot
whats your name
All good..What about you?
what is your name
I was made in 2020, if that's what you are asking!
what you can do
I am a general purpose chatbot. My capabilities are : 
 1. I can chat with you. Try asking me for jokes or riddles! 
 2. Ask me the date and time 
 3. I can google search for you. Use format google: your query 
 4. I can get the present weather for any city. Use format weather: city name 
 5. I can get you the top 10 trending news in India. Use keywords 'Latest News' 
 6. I can get you the top 10 trending songs globally. Type 'songs' 
 7. I can set a timer for you. Enter 'set a timer: minutes to timer' 
 8. I can get the present Covid stats fo