In [1]:
# Import necessary libraries
import random
from tensorflow.keras.optimizers import SGD
from keras.layers import Dense, Dropout
from keras.models import load_model, Sequential
import numpy as np
import pickle
import json
import nltk
from nltk.stem import WordNetLemmatizer

In [2]:
# Initialize WordNetLemmatizer and download NLTK data
lemmatizer = WordNetLemmatizer()
nltk.download('omw-1.4')
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Abdulmunim\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Abdulmunim\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Abdulmunim\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
# Initialize variables and read data from intents.json
words = []
classes = []
documents = []
ignore_words = ["?", "!"]
data_file = open("custom-intents.json").read()
intents = json.loads(data_file)

In [4]:
# Process intents and extract words and classes
for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        # Tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # Add documents
        documents.append((w, intent["tag"]))

        # Add classes to the class list
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

In [5]:
# Lemmatize words and create a sorted unique word list
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))


In [6]:
# Sort classes
classes = sorted(list(set(classes)))

In [7]:
# Print some statistics
print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)

106 documents
34 classes ['about', 'academic_status', 'admission_committees', 'admission_fees', 'advanced_standing_admission', 'extension_distance_undergraduate_admission', 'extension_programs', 'goodbye', 'grading_system', 'graduation_fees', 'graduation_requirements', 'greetings', 'history', 'history1', 'history2', 'history3', 'history4', 'in_service_undergraduate_admission', 'introduction', 'masters_programs', 'other_fees', 'postgraduate_programs', 'readmission_transfer_fees', 'reexamination', 'regular_undergraduate_admission', 'thanks', 'tuition_fees_ethiopian_evening', 'tuition_fees_foreign_postgraduate', 'tuition_fees_foreign_undergraduate', 'undergraduate_admission_process', 'undergraduate_graduation', 'undergraduate_programs', 'unknown', 'withdrawal_readmission']
159 unique lemmatized words ["'m", "'s", ',', '.', 'a', 'aait', 'aau', 'ababa', 'about', 'academic', 'achievement', 'addis', 'additional', 'admission', 'advanced', 'afternoon', 'allow', 'am', 'an', 'and', 'application',

In [8]:
# Save words and classes to pickle files
pickle.dump(words, open("words.pkl", "wb"))
pickle.dump(classes, open("classes.pkl", "wb"))

In [9]:
# Initialize training data
training = []
output_empty = [0] * len(classes)

In [10]:
# Process documents to create a bag of words and output labels
for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

In [11]:
# Shuffle training data
random.shuffle(training)

In [12]:
# Separate bag-of-words representations and output labels
train_x = [item[0] for item in training]
train_y = [item[1] for item in training]

In [13]:
# Convert to NumPy arrays
train_x = np.array(train_x)
train_y = np.array(train_y)

print("Training data created")


Training data created


In [14]:
# Create a neural network model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation="softmax"))
model.summary()

# Compile the model using SGD optimizer
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               20480     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 34)                2210      
                                                                 
Total params: 30946 (120.88 KB)
Trainable params: 30946 (120.88 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
# Fit the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [16]:
# Save the model
model.save("chatbot_model_latest.h5", hist)

  saving_api.save_model(
