## Import Library

In [6]:
import json
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report, confusion_matrix

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to C:\Users\Imam Bari
[nltk_data]     Setiawan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Imam Bari
[nltk_data]     Setiawan\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

## Load Data

In [2]:
# Load JSON file
with open('dataset_chatbot.json', 'r') as file:
    intents = json.load(file)

## Preprocessing Data

In [3]:
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Initialize lists
words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']

# Preprocess data
for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_letters]
words = sorted(set(words))

classes = sorted(set(classes))

## Membuat Data Pelatihan

In [4]:
# Training data
training = []
output_empty = [0] * len(classes)

# Create training data
for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)
    
    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])

random.shuffle(training)

## Membuat Urutan Pad dan Array Pelatihan

In [7]:
# Pad bags of words
max_len = len(max([pattern[0] for pattern in training], key=len))
train_x = [pad_sequences([pattern[0]], maxlen=max_len, padding='post')[0] for pattern in training]

# Create training and testing lists
train_x = np.array(train_x)
train_y = np.array([pattern[1] for pattern in training])

## Training Model

In [8]:
# Build model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# Compile model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Train model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)



Epoch 1/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.0576 - loss: 2.4576 
Epoch 2/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1564 - loss: 2.3173  
Epoch 3/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3382 - loss: 2.1949
Epoch 4/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2683 - loss: 2.0883
Epoch 5/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3849 - loss: 2.0019
Epoch 6/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4627 - loss: 1.6893
Epoch 7/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6198 - loss: 1.4131
Epoch 8/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4766 - loss: 1.4351
Epoch 9/200
[1m22/22[0m [32m━━━━━━━━━━━━━━

## Evaluasi Model

In [10]:
predictions = model.predict(np.array(train_x))
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(train_y, axis=1)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step


In [11]:
# Generate a classification report
report = classification_report(true_classes, predicted_classes, target_names=classes)

In [13]:
# Print the results
print("Classification Report:")
print(report)

Classification Report:
                         precision    recall  f1-score   support

                   diet       1.00      1.00      1.00        10
                 gejala       1.00      1.00      1.00        10
             hipertensi       1.00      1.00      1.00        10
                   opsi       1.00      1.00      1.00        10
             pencegahan       1.00      1.00      1.00        10
             pengobatan       1.00      1.00      1.00        10
penyakit_kardiovaskular       1.00      1.00      1.00        10
                  salam       1.00      1.00      1.00        10
        selamat_tinggal       1.00      1.00      1.00        10
           terima_kasih       1.00      1.00      1.00        10
            tidak_jelas       1.00      1.00      1.00        10

               accuracy                           1.00       110
              macro avg       1.00      1.00      1.00       110
           weighted avg       1.00      1.00      1.00       110


## Save model

In [10]:
# Save model
model.save('chatbot_model.h5', hist)



In [None]:
# Save tokenizer and classes
import pickle
with open('tokenizer.pkl', 'wb') as file:
    pickle.dump(words, file)
with open('classes.pkl', 'wb') as file:
    pickle.dump(classes, file)