In [2]:
import random
import json
import numpy as np
import pickle
import tensorflow as tf
import pandas as pd

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

In [14]:
lemmatizer = WordNetLemmatizer()
intents = json.loads(open('./intents.json').read())
intents

{'intents': [{'tag': 'greetings',
   'patterns': ['hello', 'hai', 'hi', 'hey', 'halo', 'good day'],
   'responses': ['Hello', 'Hi', 'What can i do for you?']},
  {'tag': 'goodbye',
   'patterns': ['bye', 'cya', 'see you later', 'daa'],
   'responses': ['Ok bye', 'Bye', 'Nice to talk with you', 'See ya']},
  {'tag': 'age',
   'patterns': ['how old are you',
    'how old r u',
    'how old',
    'your age?',
    'age?'],
   'responses': ['I have created since 2023']},
  {'tag': 'name',
   'patterns': ['what is your name',
    'who are you',
    'your name?',
    'do you have a name',
    'what should i call you',
    'tell me your name'],
   'responses': ['You can call me bataraprabu',
    "I'm bataraprabu",
    'My name is bataraprabu']}]}

In [4]:
intents_df = pd.read_json('./intents.json')

In [5]:
intents_df.head()

Unnamed: 0,intents
0,"{'tag': 'greetings', 'patterns': ['hello', 'ha..."
1,"{'tag': 'goodbye', 'patterns': ['bye', 'cya', ..."
2,"{'tag': 'age', 'patterns': ['how old are you',..."
3,"{'tag': 'name', 'patterns': ['what is your nam..."


In [62]:
# Flatten data
df_nested = pd.json_normalize(intents, record_path =['intents'])
df_nested = df_nested[['tag', 'patterns']]
df_nested = df_nested.explode(['patterns'])
# df_nested = pd.DataFrame(df_nested)
df_nested

Unnamed: 0,tag,patterns
0,greetings,hello
0,greetings,hai
0,greetings,hi
0,greetings,hey
0,greetings,halo
0,greetings,good day
1,goodbye,bye
1,goodbye,cya
1,goodbye,see you later
1,goodbye,daa


In [65]:
df_nested[['greetings', 'goodbye', 'name']] = pd.get_dummies(df_nested['tag'], drop_first=True)
df_nested

Unnamed: 0,tag,patterns,greetings,goodbye,name
0,greetings,hello,0,1,0
0,greetings,hai,0,1,0
0,greetings,hi,0,1,0
0,greetings,hey,0,1,0
0,greetings,halo,0,1,0
0,greetings,good day,0,1,0
1,goodbye,bye,1,0,0
1,goodbye,cya,1,0,0
1,goodbye,see you later,1,0,0
1,goodbye,daa,1,0,0


In [3]:
words=[]
classes=[]
documents=[]
ignore_letters=['?', '!', '.', ',']

In [4]:
tokenizer = word_tokenize

In [5]:
'intents.json-> intents-> {tag, patterns, responses}' "ket: -> berarti berisi"

'intents.json-> intents-> {tag, patterns, responses}ket: -> berarti berisi'

In [6]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = tokenizer(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])
        

In [7]:
print(documents)

[(['hello'], 'greetings'), (['hai'], 'greetings'), (['hi'], 'greetings'), (['hey'], 'greetings'), (['halo'], 'greetings'), (['good', 'day'], 'greetings'), (['bye'], 'goodbye'), (['cya'], 'goodbye'), (['see', 'you', 'later'], 'goodbye'), (['daa'], 'goodbye'), (['how', 'old', 'are', 'you'], 'age'), (['how', 'old', 'r', 'u'], 'age'), (['how', 'old'], 'age'), (['your', 'age', '?'], 'age'), (['age', '?'], 'age'), (['what', 'is', 'your', 'name'], 'name'), (['who', 'are', 'you'], 'name'), (['your', 'name', '?'], 'name'), (['do', 'you', 'have', 'a', 'name'], 'name'), (['what', 'should', 'i', 'call', 'you'], 'name'), (['tell', 'me', 'your', 'name'], 'name')]


In [8]:
print(words)

['hello', 'hai', 'hi', 'hey', 'halo', 'good', 'day', 'bye', 'cya', 'see', 'you', 'later', 'daa', 'how', 'old', 'are', 'you', 'how', 'old', 'r', 'u', 'how', 'old', 'your', 'age', '?', 'age', '?', 'what', 'is', 'your', 'name', 'who', 'are', 'you', 'your', 'name', '?', 'do', 'you', 'have', 'a', 'name', 'what', 'should', 'i', 'call', 'you', 'tell', 'me', 'your', 'name']


In [9]:
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))

In [10]:
print(words)

['a', 'age', 'are', 'bye', 'call', 'cya', 'daa', 'day', 'do', 'good', 'hai', 'halo', 'have', 'hello', 'hey', 'hi', 'how', 'i', 'is', 'later', 'me', 'name', 'old', 'r', 'see', 'should', 'tell', 'u', 'what', 'who', 'you', 'your']


In [11]:
classes = sorted(set(classes))

In [12]:
pickle.dump(words,open('words.pkl', 'wb'))
pickle.dump(classes,open('classes.pkl', 'wb'))

In [13]:
training = []
output_empty = [0]*len(classes)
print(list(output_empty))

[0, 0, 0, 0]


# one hot encoding

In [14]:
classes.index('greetings')

2

In [15]:
for doc in documents:
    bag = []
    word_pattern = doc[0]
    word_pattern = [lemmatizer.lemmatize(word.lower()) for word in word_pattern]
    for word in words:
        bag.append(1) if word in word_pattern else bag.append(0) #one hot encoding
    
    label_class = list(output_empty)
    label_class[classes.index(doc[1])]=1
    training.append([bag, label_class])
        

In [16]:
random.shuffle(training)
training = np.array(training)

  training = np.array(training)


In [17]:
X_train = list(training[:, 0])
y_train = list(training[:, 1])

In [18]:
print(len(X_train[0]), len(X_train[6]))

32 32


In [67]:
type(np.array(X_train))

numpy.ndarray

In [93]:
model = Sequential()
model.add(Dense(128, input_shape=(len(X_train[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation= 'relu'))
model.add(Dropout(0.5))
model.add(Dense(len(y_train[0]), activation='softmax'))

In [94]:
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 128)               4224      
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_8 (Dense)             (None, 4)                 260       
                                                                 
Total params: 12,740
Trainable params: 12,740
Non-trainable params: 0
_________________________________________________________________
None


In [95]:
sgd= SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [96]:
model.fit(np.array(X_train), np.array(y_train), epochs=200, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x1b0451eca00>

In [91]:
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))

In [37]:
def cleansing(text):
    word = word_tokenize(text)
    word = [lemmatizer.lemmatize(w) for w in word]
    return word  

In [106]:
def bagofword(text):
    word = cleansing(text)
    bag = [0]*len(words)
    for w in word:
        for i, t in enumerate(words):
            if t == w:
                bag[i] = 1
#     return print(type(tf.convert_to_tensor(bag, dtype=tf.float32)))
#     return print(type(np.array(bag)))
    return np.array(bag).reshape(1,32)
    
    

In [112]:
def predict(t):
    bow = bagofword(t)
    res = model.predict(np.array(bow))[0]
    error_treshold = 0.25
    result = [[i,r] for i,r in enumerate(res) if r > error_treshold]
    
    result.sort(key=lambda x: x[1],reverse=True)
    return_list =[]
    for r in result:
        return_list.append({'intents': classes[r[0]], 'probability' : str(r[1])})
    return return_list

In [113]:
def get_response(intent_list, intent_json):
    tag = intent_list[0]['intents']
    list_of_intent = intent_json['intents']
    for i in list_of_intent:
        if i['tag'] == tag:
            result=random.choice(i['responses'])
            break
    return result

In [114]:
print('Bot is Running')

Bot is Running


In [None]:
while True:
    message=input("")
    ints = predict(message)
    res= get_response(ints, intents)
    print(res)

halo
Hello
name
My name is bataraprabu
how
I have created since 2023
hoe are you
You can call me bataraprabu
how old are you
I have created since 2023
how old are you
I have created since 2023
your name
You can call me bataraprabu


In [108]:
h='halo'
bagofword(h).shape

(1, 32)

In [103]:
np.array(X_train).shape

(21, 32)