<a href="https://colab.research.google.com/github/dr-mushtaq/Chatbot-in-e-learning-system/blob/main/Model_Training_Mushtaq_V1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**Table of Content**</p>



1.   Import Library
2.   Import Dataset
3.   Data Preprocessing
4.   Neural Network




#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**1-Import library**</p>

In [1]:
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random

In [2]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**2-Import Dataset**</p>

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
words=[]
classes = []
documents = []
ignore_words = ['?', '!']
data_file = open('/content/gdrive/MyDrive/Research /Research Projects/Chatbot System in Education /Dataset/intents.json').read()
intents = json.loads(data_file)


#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**3-Data Preprocessing**</p>

In [5]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [6]:
for intent in intents['intents']:
    for pattern in intent['patterns']:

        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        #add documents in the corpus
        documents.append((w, intent['tag']))

        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [7]:
 import nltk
 nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [8]:
# lemmaztize and lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# sort classes
classes = sorted(list(set(classes)))
# documents = combination between patterns and intents
print (len(documents), "documents")
# classes = intents
print (len(classes), "classes", classes)
# words = all words, vocabulary
print (len(words), "unique lemmatized words", words)



235 documents
93 classes [' ADMISSION OF FOREIGN/DUAL NATIONAL', ' Additional Specialization Certificate', ' Admission Status', ' Alumni Association ', ' Citizen complaint portal ', ' Club and Societies ', ' Courses Catalogue', ' Degree/Transcript Verification ', ' Digiskills ', ' Disabled Student Fee structure(Local)', ' Disabled Student Fee structure(Overseas)', ' Exam Superintendent Registration ', ' FACULTY OF SCIENCE & TECHNOLOGY Programs', ' Faculty of Arts Programs', ' Faculty of Education Programs', ' Faculty of Management Programs', ' Grading Scheme', ' How to Apply Online', ' Life at VU', ' New Registration', ' ORIC ', ' PROFESSIONAL COURSES DEVELOPMENT', ' PROTECTION AGAINST HARASSMENT  ', ' Prospectus', ' RIGHT OF ACCESS TO INFORMATION ', ' Recent Advertisements of VU', ' Schedule of other Charges', ' Short Certificate Course', ' Student Startup', ' Study Scheme', ' Suitability of  VU', ' Tender', ' Vendor Registration ', ' Zero Semester', 'Academic Calendar', 'Acceptable a

In [9]:
pickle.dump(words,open('texts.pkl','wb'))
pickle.dump(classes,open('labels.pkl','wb'))

**Create our training data**

In [10]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)
# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])
# shuffle our features and turn into np.array
random.shuffle(training)

# Padding to ensure consistent shape for bag-of-words representations
max_len = max(len(sample[0]) for sample in training)  # Find the maximum length
padded_training = []
for sample in training:
    bag, output_row = sample
    # Check if padding is actually necessary
    if len(bag) < max_len:
        padded_bag = bag + [0] * (max_len - len(bag))  # Pad with zeros if needed
    else:
        padded_bag = bag  # No padding needed
    padded_training.append([padded_bag, output_row])

training = np.array(padded_training, dtype=object) # Convert the padded list to NumPy array, allow object dtype


# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data created")


Training data created


**Suggested Optimization**

1. Padding is unnecessary for bag-of-words

Bag-of-words (BoW) representations are already of fixed length because the word list (words) defines the dimensionality. The for w in words loop ensures each vector has the same length. Thus, the explicit padding step is redundant.

2. Potential Issue with np.array Conversion

np.array(training, dtype=object) is used, but NumPy arrays are better suited for numerical data. Consider using np.array(training, dtype=np.float32) if all values are numerical.

3. Shuffling Before NumPy Conversion

You correctly shuffle the data before converting it to a NumPy array, which is good practice to ensure randomness in training.

In [11]:
import random
import numpy as np

# Create our training data
training = []

# Create an empty array for output (one-hot encoded)
output_empty = [0] * len(classes)

# Training set: bag of words for each sentence
for doc in documents:
    # Initialize our bag of words
    bag = [1 if w in [lemmatizer.lemmatize(word.lower()) for word in doc[0]] else 0 for w in words]

    # Output is '1' for current tag, '0' for others
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

# Shuffle and convert to NumPy array
random.shuffle(training)
training = np.array(training, dtype=object)

# Split features and labels
train_x = list(training[:, 0])  # Input patterns
train_y = list(training[:, 1])  # Corresponding intents

print("Training data created")


Training data created


#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4-Model Training**</p>

#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.1-Neural Netowrk**</p>


##<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.1.1 Model Training**</p>

In [None]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True) # Changed 'lr' to 'learning_rate'
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

**Observations & Suggestions**

1. **Dropout Rate**

You are using 0.5 dropout after both hidden layers, which is quite aggressive. This might slow down training or cause underfitting. Try 0.3 instead of 0.5.

2- **Optimizer Choice**

SGD with Nesterov momentum works well, but Adam optimizer (optimizer='adam') is often better for deep learning models in NLP tasks.

If you want to stick with SGD, consider learning rate decay scheduling to improve training convergence.

3. **Epochs & Batch Size Tuning**

epochs=200 might be excessive unless you have a large dataset. Try 100 epochs first, then increase if needed.

batch_size=5 is quite small. Try 16 or 32 for better efficiency.

4. **Convert Lists to NumPy Arrays Before Training**

Ensure train_x and train_y are explicitly converted to NumPy arrays before feeding them into model.fit().



In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam
import numpy as np

# Create the model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.3))  # Reduced dropout
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))  # Reduced dropout
model.add(Dense(len(train_y[0]), activation='softmax'))

# Choose optimizer (SGD with decay or Adam)
optimizer = Adam(learning_rate=0.001)  # Adam usually performs better
# optimizer = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)  # Optional

# Compile model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Convert to NumPy arrays
train_x = np.array(train_x)
train_y = np.array(train_y)

# Train the model
hist = model.fit(train_x, train_y, epochs=100, batch_size=16, verbose=1)

# Save the trained model
model.save("chatbot_model.h5")

print("Model training complete and saved as chatbot_model.h5")

Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 109ms/step - accuracy: 0.0168 - loss: 4.5451
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0250 - loss: 4.4858    
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0350 - loss: 4.4431     
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0869 - loss: 4.3896 
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1504 - loss: 4.3196 
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1643 - loss: 4.2314 
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1344 - loss: 4.1026 
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1796 - loss: 3.9737 
Epoch 9/100
[1m15/15[0m [32m━



Model training complete and saved as chatbot_model.h5


##<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.1.2 Model Saving**</p>

In [None]:
model.save('chat_model.keras', hist) # Changed 'chat_model' to 'chat_model.keras' to include the .keras extension
print("model created")

model created



#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**5.1-Random Forest**</p>

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import joblib

# Convert lists to NumPy arrays
train_x = np.array(train_x)

# Convert one-hot labels to categorical labels (integer encoding)
label_encoder = LabelEncoder()
train_y_labels = label_encoder.fit_transform([np.argmax(y) for y in train_y])  # Convert one-hot to integer

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y_labels, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=500, max_depth=None, random_state=42, class_weight="balanced")
rf_model.fit(X_train, y_train)

# Predict on test set
y_pred = rf_model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Improved Random Forest Model Accuracy: {accuracy:.2f}")

# Save the model and label encoder
joblib.dump(rf_model, "chatbot_rf_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

print("Random Forest model training complete and saved as chatbot_rf_model.pkl")


✅ Improved Random Forest Model Accuracy: 0.53
Random Forest model training complete and saved as chatbot_rf_model.pkl


#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.2- LSTM**</p>

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Flatten,Dropout, BatchNormalization
from tensorflow.keras.layers import Bidirectional

##<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.2.1 - Model Architecture 1**</p>


Stacked LSTM (multi-layer LSTM) with Batch Normalization

In [None]:
from tensorflow.keras.optimizers.schedules import ExponentialDecay # Import ExponentialDecay
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Flatten,Dropout, BatchNormalization
from tensorflow.keras.layers import Bidirectional

#Create the LSTM network
model = Sequential()
model.add(LSTM(64, input_shape=(len(train_x[0]),1), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(32,return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(16,return_sequences=False))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(len(train_y[0]),activation="softmax"))
model.summary()
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
lr_schedule = ExponentialDecay( # Now you can use ExponentialDecay
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)
#adam = Adam(learning_rate=lr_schedule)
sgd = SGD(learning_rate=0.01 ,momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=128, verbose=1)

  super().__init__(**kwargs)


Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 74ms/step - accuracy: 0.0137 - loss: 4.6464
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.0054 - loss: 4.7265
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.0054 - loss: 4.7115
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.0028 - loss: 4.8089    
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.0166 - loss: 4.6691
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.0057 - loss: 4.6717    
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.0355 - loss: 4.6880
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.0057 - loss: 4.6615    
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━

##<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.2.2 - Model Architecture 2**</p>

Deeper Stacked LSTM architecture

In [None]:
#Create the LSTM network
model = Sequential()
model.add(LSTM(128, input_shape=(len(train_x[0]),1), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64,return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(32,return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(16,return_sequences=False))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(len(train_y[0]),activation="softmax"))
model.summary()
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
lr_schedule = ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)
#adam = Adam(learning_rate=lr_schedule)
sgd = SGD(learning_rate=0.001 ,momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=100, batch_size=128, verbose=1)

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 104ms/step - accuracy: 0.0140 - loss: 4.6680
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.0220 - loss: 4.6230
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.0111 - loss: 4.7041
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.0166 - loss: 4.6874
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.0166 - loss: 4.7398
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.0109 - loss: 4.6744
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.0000e+00 - loss: 4.6417
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.0054 - loss: 4.6869
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[

##<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.2.3 - Model Architecture 3**</p>


Bidirectional LSTMs

In [None]:
model = Sequential()
model.add(Bidirectional(LSTM(units=64,input_shape=(len(train_x[0]),1),return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(units=32,return_sequences=True)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(units=16,return_sequences=False)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(len(train_y[0]),activation="softmax"))

# Build the model
model.build(input_shape=(None, len(train_x[0]), 1))

model.summary()

In [None]:
sgd = SGD(learning_rate=0.001 ,momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=100, batch_size=128, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
model.save( path_to_save_model+'chat_model_3', hist)
print("model created and saved")

model created and saved


##<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**4.2.4 - Model Architecture 4**</p>


In [None]:
model = Sequential()
model.add(Bidirectional(LSTM(units=128,input_shape=(len(train_x[0]),1),return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(units=64,return_sequences=True)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(units=32,return_sequences=True)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(units=16,return_sequences=False)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(len(train_y[0]),activation="softmax"))

# Build the model
model.build(input_shape=(None, len(train_x[0]), 1))

model.summary()

sgd = SGD(learning_rate=0.001 ,momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=100, batch_size=128, verbose=1)

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 136ms/step - accuracy: 0.0166 - loss: 4.8668
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - accuracy: 0.0083 - loss: 4.8738
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step - accuracy: 0.0166 - loss: 4.8061
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step - accuracy: 0.0054 - loss: 4.7760
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step - accuracy: 0.0000e+00 - loss: 4.6855
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.0194 - loss: 4.8023
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.0111 - loss: 4.7610
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.0113 - loss: 4.7219
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━

In [None]:
model.save( path_to_save_model+'chat_model_4', hist)
print("model created and saved")

model created and saved


In [None]:
model = Sequential()
model.add(Bidirectional(LSTM(units=128,input_shape=(len(train_x[0]),1),return_sequences=True)))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(units=64,return_sequences=True)))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(units=32,return_sequences=True)))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(units=16,return_sequences=False)))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(len(train_y[0]),activation="softmax"))

# Build the model
model.build(input_shape=(None, len(train_x[0]), 1))

model.summary()

sgd = SGD(learning_rate=0.001 ,momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=100, batch_size=128, verbose=1)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirection  (None, 391, 256)          133120    
 al)                                                             
                                                                 
 batch_normalization (Batch  (None, 391, 256)          1024      
 Normalization)                                                  
                                                                 
 bidirectional_1 (Bidirecti  (None, 391, 128)          164352    
 onal)                                                           
                                                                 
 dropout (Dropout)           (None, 391, 128)          0         
                                                                 
 bidirectional_2 (Bidirecti  (None, 391, 64)           41216     
 onal)                                                  

In [None]:
model.save( path_to_save_model+'chat_model_5', hist)
print("model created and saved")


#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**BERT**</p>

In [None]:
!pip install transformers torch datasets scikit-learn nltk


In [None]:
!git clone https://github.com/dr-mushtaq/Chatbot-in-e-learning-system.git

Cloning into 'Chatbot-in-e-learning-system'...
remote: Enumerating objects: 377, done.[K
remote: Counting objects: 100% (88/88), done.[K
remote: Compressing objects: 100% (86/86), done.[K
remote: Total 377 (delta 56), reused 2 (delta 2), pack-reused 289 (from 2)[K
Receiving objects: 100% (377/377), 1.45 MiB | 20.65 MiB/s, done.
Resolving deltas: 100% (179/179), done.


In [None]:
import json
import torch
import random
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Download NLTK tokenizer if not available
nltk.download('punkt')



MODEL_PATH = "./Chatbot-in-e-learning-system/private_bert_model"  # Update this to the correct path

from transformers import BertTokenizer, BertForSequenceClassification

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained(MODEL_PATH)
model = BertForSequenceClassification.from_pretrained(MODEL_PATH, num_labels=len(classes))

# Initialize tokenizer from private BERT model
#MODEL_PATH = "./private_bert_model"
#tokenizer = BertTokenizer.from_pretrained(MODEL_PATH)

# Extract patterns and labels
sentences = []
labels = []

for intent in intents['intents']:
    for pattern in intent['patterns']:
        sentences.append(pattern)
        labels.append(intent['tag'])

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
num_classes = len(set(labels_encoded))

# Split dataset into training and testing
train_texts, test_texts, train_labels, test_labels = train_test_split(sentences, labels_encoded, test_size=0.2, random_state=42)

# Tokenization function
def tokenize_function(texts):
    return tokenizer(texts, padding="max_length", truncation=True, return_tensors="pt", max_length=64)

# Create Dataset Class
class ChatbotDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenize_function(texts)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {key: val[idx] for key, val in self.encodings.items()}, self.labels[idx]

# Prepare datasets
train_dataset = ChatbotDataset(train_texts, train_labels)
test_dataset = ChatbotDataset(test_texts, test_labels)

# Load the model from private directory
model = BertForSequenceClassification.from_pretrained(MODEL_PATH, num_labels=num_classes)

# Define Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Save the trained model
model.save_pretrained("./trained_chatbot_model")
tokenizer.save_pretrained("./trained_chatbot_model")

print("BERT chatbot model trained and saved successfully!")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


OSError: Incorrect path_or_model_id: './Chatbot-in-e-learning-system/private_bert_model'. Please provide either the path to a local folder or the repo_id of a model on the Hub.

#<p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing: 1px; color:#207d06; font-size:100%; text-align:left;padding: 0px; border-bottom: 3px solid #207d06;">**References**</p>

[Chatgpt Roadmap](https://chatgpt.com/c/67dd0d37-384c-800e-8fb3-e12a2efcaab1)