In [14]:
# libs
import random
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
import nltk
from nltk.stem import WordNetLemmatizer
# nltk.download('punkt')
# nltk.download('wordnet')

In [15]:
# intents dictionary
dictionary = open('../intents/intents.json').read()
intents = json.loads(dictionary)

In [16]:
# ignored characters
# words -> list of tokenized words
# tags -> tags defined from dictionary
# documents -> word (tokenized) and tag relation
exclude = ['.', ',', '!', '?']
words = []  # list for each tokenized words (words are separated from each other in a statement/phrase)
classes = []  # class or label (tags)
documents = []  # list for the combinations, where each tokenized words belong in relation to tags

exclude

['.', ',', '!', '?']

In [17]:
# iterates over the dictionary
for intent in intents['intents']:
    for pattern in intent['patterns']:
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        documents.append((tokens, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# output         
print("Words: ")
for i in words:
    print(i)

print("Classes: ")
for i in classes:
    print(i)
    
print(documents)

Words: 
hi
hello
hey
good
morning
good
evening
greetings
what
's
up
?
how
are
you
doing
?
good
day
what
are
you
called
?
who
are
you
?
what
is
your
name
?
requirements
form
138
form
137
report
card
grades
marks
subjects
courses
documents
remaining
requirements
course
schedule
subject
schedule
course
schedule
what
is
my
schedule
for
this
subject
?
what
is
my
schedule
for
this
course
?
what
time
does
my
class
start
?
how
can
I
change
my
schedule
?
i
need
to
change
my
schedule
how
can
I
enroll
on
a
subject
?
how
can
I
drop
a
subject
?
i
need
to
pass
requirements
where
do
I
pass
requirements
?
what
are
my
missing
documents
?
payment
pay
bill
alternative
payment
gcash
cash
bank
online
payment
balance
installment
how
much
is
my
remaining
balance
?
how
do
I
pay
?
how
can
I
pay
?
where
can
I
pay
?
what
are
the
ways
I
can
pay
?
can
I
pay
online
?
can
I
pay
onsite
?
where
is
the
cashier
?
what
are
the
office
hours
of
cashier
?
can
I
pay
another
way
?
how
much
do
I
need
to
pay
?
do
I
need
to
pay


In [18]:
lem = WordNetLemmatizer()
words = [lem.lemmatize(word) for word in words if word not in exclude]  # if word is not in exclude, lemmatize word

print(words)

['hi', 'hello', 'hey', 'good', 'morning', 'good', 'evening', 'greeting', 'what', "'s", 'up', 'how', 'are', 'you', 'doing', 'good', 'day', 'what', 'are', 'you', 'called', 'who', 'are', 'you', 'what', 'is', 'your', 'name', 'requirement', 'form', '138', 'form', '137', 'report', 'card', 'grade', 'mark', 'subject', 'course', 'document', 'remaining', 'requirement', 'course', 'schedule', 'subject', 'schedule', 'course', 'schedule', 'what', 'is', 'my', 'schedule', 'for', 'this', 'subject', 'what', 'is', 'my', 'schedule', 'for', 'this', 'course', 'what', 'time', 'doe', 'my', 'class', 'start', 'how', 'can', 'I', 'change', 'my', 'schedule', 'i', 'need', 'to', 'change', 'my', 'schedule', 'how', 'can', 'I', 'enroll', 'on', 'a', 'subject', 'how', 'can', 'I', 'drop', 'a', 'subject', 'i', 'need', 'to', 'pas', 'requirement', 'where', 'do', 'I', 'pas', 'requirement', 'what', 'are', 'my', 'missing', 'document', 'payment', 'pay', 'bill', 'alternative', 'payment', 'gcash', 'cash', 'bank', 'online', 'paymen

In [19]:
words = sorted(set(words))  # removes duplicated words
classes = sorted(set(classes))  # removes duplicate tags

#output
print(words)
print(classes)

["'m", "'s", '137', '138', 'BSCS', 'BSIT', 'BSTM', 'I', 'ID', 'RFID', 'STI', 'a', 'access', 'account', 'alternative', 'an', 'and', 'another', 'anything', 'are', 'balance', 'bank', 'bill', 'broken', 'ca', 'called', 'can', 'cant', 'card', 'cash', 'cashier', 'change', 'class', 'computer', 'course', 'day', 'did', 'displaying', 'do', 'document', 'doe', "doens't", 'doing', 'drop', 'eLMS', 'elm', 'enroll', 'error', 'evening', 'for', 'form', 'gate', 'gcash', 'getting', 'go', 'good', 'grade', 'greeting', 'ha', 'have', 'having', 'hello', 'help', 'hey', 'hi', 'hospitality', 'hour', 'how', 'i', 'im', 'in', 'information', 'installment', 'is', 'log', 'login', 'management', 'mark', 'me', 'microsoft', 'missing', 'month', 'morning', 'much', 'my', "n't", 'name', 'need', 'not', 'of', 'offer', 'office', 'on', 'one', 'online', 'onsite', 'open', 'pas', 'password', 'pay', 'payment', 'portal', 'problem', 'program', 'reflect', 'remaining', 'replacement', 'report', 'requirement', 'reset', 'schedule', 'science',

In [20]:
# serializes each element | wb -> writing binary | outputs a pickle file (.pkl)
pickle.dump(words, open('../pkl/words.pkl', 'wb'))
pickle.dump(classes, open('../pkl/tags.pkl', 'wb'))

<============= MACHINE LEARNING =============>

In [21]:
training = []
outputEmpty = [0] * len(classes)  # template of zeroes (0), however many classes there are

# output
print(outputEmpty)

[0, 0, 0, 0, 0, 0]


In [22]:
for document in documents:
    bag = []  # for each combination(documents), creates an empty bag of words
    wordPatterns = document[0]
    wordPatterns = [lem.lemmatize(word.lower()) for word in wordPatterns]  # lemmatize each word in wordPatters, which consist of the index 0 (words) in each document element
    
    # inputs 1 or 0 into the bag of words depending whether it occurs in the pattern or not, respectively
    for word in words:
        if word in wordPatterns:
            bag.append(1)
        else:
            bag.append(0)

In [23]:
outputRow = list(outputEmpty)
outputRow[classes.index(document[1])] = 1
training.append(bag + outputRow)  # stores the value of bag (document[0]) and outputRow (document[1]) to training list which is either 1 or 0

# output
print(document)
print(bag)
print(outputRow)

(['hospitality', 'and', 'management'], 'programs')
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 1]


In [24]:
random.shuffle(training)  # shuffles training data
training = np.array(training)  # converts to numpy array

In [25]:
# splits the array into two dimensions, x for words and y for classes
trainX = training[:, :len(words)]
trainY = training[:, len(words):]

In [26]:
# building the neural network
model = Sequential([
    # input layer with 128 neurons
    # input shape is dependent to the shape of the training data for x
    # activation function = rectified linear unit | if feature is determined to be significant label as 1, otherwise 0
    Dense(128, input_shape=(len(trainX[0]),), activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    
    # output layer
    # activation function = softmax | returns the probability that a certain input belongs to a specific class (tag)
    Dense(len(trainY[0]), activation='softmax')  
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

print("Building model...")
history = model.fit(trainX, trainY, epochs=250, batch_size=5, verbose=1)
model.save('./model/chatbotModel.h5', history) 
print('Model created.')

Building model...
Epoch 1/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 951ms/step - accuracy: 0.0000e+00 - loss: 1.6443
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.0000e+00 - loss: 1.8287
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 1.0000 - loss: 1.6008
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 1.0000 - loss: 1.2524
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 1.0000 - loss: 1.0652
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 1.0000 - loss: 1.3123
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 1.0000 - loss: 1.1482
Epoch 8/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 1.0000 - loss: 0.7491
Epoch 9/250
[1m1/1[0m [32m



Model created.
