In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import io
import nltk
import json
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Input, Embedding, LSTM , Dense,GlobalAveragePooling1D,Flatten, Dropout , GRU
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Conv1D, MaxPool1D



In [2]:
with open('intents.json', 'r') as json_data:
    dataset = json.load(json_data)

In [3]:
def processing_json_dataset(dataset):
  tags = []
  inputs = []
  responses={}
  for intent in dataset['intents']:
    responses[intent['intent']]=intent['responses']
    for lines in intent['text']:
      inputs.append(lines)
      tags.append(intent['intent'])
  return [tags, inputs, responses]

In [4]:
[tags, inputs, responses] = processing_json_dataset(dataset)

In [5]:
dataset = pd.DataFrame({"inputs":inputs,
                     "tags":tags})

In [6]:
dataset.head()

Unnamed: 0,inputs,tags
0,Hi,Greeting
1,Hi there,Greeting
2,Hola,Greeting
3,Hello,Greeting
4,Hello there,Greeting


In [7]:
dataset = dataset.sample(frac=1)

In [8]:
dataset.head()

Unnamed: 0,inputs,tags
160,What causes mental illness?,MentalHealthFAQs
153,What are the benefits of art therapy and creat...,MentalHealthFAQs
445,How can I troubleshoot driver-related issues i...,UserFAQs
179,What causes seasons on Earth?,ScienceQuery
271,What is the capital of Indonesia?,CountriesKnowledgeInquiry


In [9]:
import string
dataset['inputs'] = dataset['inputs'].apply(lambda sequence:
                                            [ltrs.lower() for ltrs in sequence if ltrs not in string.punctuation])

In [10]:
dataset.head()

Unnamed: 0,inputs,tags
160,"[w, h, a, t, , c, a, u, s, e, s, , m, e, n, ...",MentalHealthFAQs
153,"[w, h, a, t, , a, r, e, , t, h, e, , b, e, ...",MentalHealthFAQs
445,"[h, o, w, , c, a, n, , i, , t, r, o, u, b, ...",UserFAQs
179,"[w, h, a, t, , c, a, u, s, e, s, , s, e, a, ...",ScienceQuery
271,"[w, h, a, t, , i, s, , t, h, e, , c, a, p, ...",CountriesKnowledgeInquiry


In [11]:
dataset['inputs'] = dataset['inputs'].apply(lambda wrd: ''.join(wrd))

In [12]:
dataset.head()

Unnamed: 0,inputs,tags
160,what causes mental illness,MentalHealthFAQs
153,what are the benefits of art therapy and creat...,MentalHealthFAQs
445,how can i troubleshoot driverrelated issues in...,UserFAQs
179,what causes seasons on earth,ScienceQuery
271,what is the capital of indonesia,CountriesKnowledgeInquiry


In [13]:
dataset.tail()

Unnamed: 0,inputs,tags
169,how does the human brain work,ScienceQuery
57,i am not talking to you,NotTalking2U
29,who do you think i am,CurrentHumanQuery
25,great thanks i am bella,CourtesyGreetingResponse
490,how can i fix high cpu usage in windows 10,UserFAQs


In [14]:
tokenizer = Tokenizer(num_words=13200)
tokenizer.fit_on_texts(dataset['inputs'])
train = tokenizer.texts_to_sequences(dataset['inputs'])
features = pad_sequences(train)
le = LabelEncoder()
labels = le.fit_transform(dataset['tags'])

In [15]:
len(features[0])

22

In [16]:
input_shape = features.shape[1]
print(input_shape)

22


In [19]:
features.shape

(500, 22)

In [18]:
vocabulary = len(tokenizer.word_index)
print("number of unique words : ",vocabulary)
output_length = le.classes_.shape[0]
print("output length: ",output_length)

number of unique words :  871
output length:  28


In [20]:
tokenizer.word_index

{'the': 1,
 'what': 2,
 'i': 3,
 'how': 4,
 'of': 5,
 'in': 6,
 'can': 7,
 'is': 8,
 'and': 9,
 'are': 10,
 'do': 11,
 'learning': 12,
 'me': 13,
 'you': 14,
 'to': 15,
 'for': 16,
 'my': 17,
 'tell': 18,
 'windows': 19,
 'a': 20,
 'about': 21,
 'it': 22,
 'language': 23,
 'with': 24,
 'im': 25,
 'machine': 26,
 'mental': 27,
 'capital': 28,
 'official': 29,
 'ai': 30,
 'data': 31,
 'unique': 32,
 'explain': 33,
 'whats': 34,
 'some': 35,
 'python': 36,
 'deep': 37,
 'concept': 38,
 'model': 39,
 'use': 40,
 'health': 41,
 'on': 42,
 'neural': 43,
 'models': 44,
 'computer': 45,
 'role': 46,
 'networks': 47,
 'thanks': 48,
 'describe': 49,
 'techniques': 50,
 'system': 51,
 'does': 52,
 'work': 53,
 'nlp': 54,
 'name': 55,
 'issues': 56,
 'your': 57,
 'this': 58,
 'should': 59,
 'working': 60,
 'them': 61,
 'error': 62,
 'using': 63,
 'natural': 64,
 'not': 65,
 'challenges': 66,
 'am': 67,
 'address': 68,
 'or': 69,
 'if': 70,
 'performance': 71,
 'tasks': 72,
 'transfer': 73,
 'strat

In [22]:
#Building RNN Model
vocabulary = 871  # Approximate number of unique words in dataset
output_length = 28 # Define the output length (number of classes or words)
m = Sequential()
m.add(Input(shape=(features.shape[1])))
m.add(Embedding(vocabulary + 1,200))
m.add(Conv1D(filters=32, kernel_size=5, activation="relu", kernel_initializer=tf.keras.initializers.GlorotNormal(),bias_regularizer=tf.keras.regularizers.L2(0.0001), kernel_regularizer=tf.keras.regularizers.L2(0.0001), activity_regularizer = tf.keras.regularizers.L2(0.0001))) 
m.add(Dropout(0.3))
m.add(LSTM(64, dropout=0.3,return_sequences=True))
m.add(LSTM(32, dropout=0.3,return_sequences=False))
m.add(Dense(256,activation="relu", activity_regularizer = tf.keras.regularizers.L2(0.0001))) 
m.add(Dropout(0.6))
m.add(Dense(output_length, activation="softmax", activity_regularizer = tf.keras.regularizers.L2(0.0001)))

In [25]:
import sklearn
import scipy
import requests
import zipfile

In [None]:
destination_folder = "C:\\Users\Kaleem\\"


url = "https://nlp.stanford.edu/data/glove.6B.zip"
file_name = "glove.6B.zip"

response = requests.get(url)

if response.status_code == 200:
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print("File downloaded successfully.")
else:
    print("Failed to download the file. Status code:", response.status_code)



In [31]:
# Unzip the downloaded file to the specified destination folder
with zipfile.ZipFile(file_name, "r") as zip_ref:
    zip_ref.extractall(destination_folder)


In [34]:
glove_dir = "glove.6B.200d.txt"
embeddings_index = {}
file_ = open(glove_dir, encoding='utf-8')  # Specify the encoding as UTF-8
for line in file_:
    arr = line.split()
    single_word = arr[0]
    w = np.asarray(arr[1:], dtype='float32')
    embeddings_index[single_word] = w
file_.close()
print('Found %s word vectors.' % len(embeddings_index))


Found 400000 word vectors.


In [35]:
max_words = vocabulary + 1
word_index = tokenizer.word_index
embedding_matrix = np.zeros((max_words,200)).astype(object)
for word , i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

In [36]:
m.layers[0].set_weights([embedding_matrix])
m.layers[0].trainable = False

In [92]:
m.compile(loss="sparse_categorical_crossentropy",optimizer='adam',metrics=['accuracy'])

In [38]:
m.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 22, 200)           174400    
                                                                 
 conv1d_1 (Conv1D)           (None, 18, 32)            32032     
                                                                 
 dropout_2 (Dropout)         (None, 18, 32)            0         
                                                                 
 lstm_2 (LSTM)               (None, 18, 64)            24832     
                                                                 
 lstm_3 (LSTM)               (None, 32)                12416     
                                                                 
 dense_2 (Dense)             (None, 256)               8448      
                                                                 
 dropout_3 (Dropout)         (None, 256)              

In [41]:
from keras.callbacks import TensorBoard, EarlyStopping
earlyStopping = EarlyStopping(monitor = 'loss', patience = 400, mode = 'min', restore_best_weights = True)

In [43]:
history_training = m.fit(features,labels,epochs=1000, batch_size=64, callbacks=[ earlyStopping])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000


KeyboardInterrupt: 

In [47]:
m.evaluate(features, labels, batch_size = 64)



[0.03561866283416748, 0.9980000257492065]

In [48]:
#Testing our chatbot
import random
def generate_answer(query):
  texts = []
  pred_input = query
  pred_input = [letters.lower() for letters in pred_input if letters not in string.punctuation]
  pred_input = ''.join(pred_input)
  texts.append(pred_input)
  pred_input = tokenizer.texts_to_sequences(texts)
  pred_input = np.array(pred_input).reshape(-1)
  pred_input = pad_sequences([pred_input],input_shape)
  output = m.predict(pred_input)
  output = output.argmax()
  response_tag = le.inverse_transform([output])[0]
  return random.choice(responses[response_tag])

In [106]:
list_que = ["hello", "tell me May Name","shit","what is your real name?","tell me a gossip",
            "write some poetry,","YO ARE VERY INTELLIGENT","i feel sick today","cough","can you give me sAfety Tips?",]
for i in list_que:
  print("you: {}".format(i))
  res_tag = generate_answer(i)
  print(res_tag)  

you: hello
Hi! I'm here to assist you. What's on your mind?
you: tell me May Name
Your name is Anas, how can I help you?
you: shit
I am sorry Boss
you: what is your real name?
My name is M.A.R.C, short for Multifunctional AI Response Companion
you: tell me a gossip
John said he follow a saying to get a friend I must be a friend.
you: write some poetry,
Footprints in the sand, a journey shared, Memories created, for those who cared
you: YO ARE VERY INTELLIGENT
Thank you, I was trained that way
you: i feel sick today
I see. It's important to address your health concerns. What symptoms are you experiencing?
you: cough
I read you loud and clear!
you: can you give me sAfety Tips?
Of course, taking precautions is important. In general, it's essential to maintain good hygiene, wash your hands regularly, and avoid close contact with sick individuals. If you have specific symptoms, consult a healthcare professional for guidance.


In [105]:
#saving the model
!pip install pyyaml h5py

from tensorflow import keras



In [100]:
m.save("Marc_chatbot.h5")