In [None]:
import json
import numpy as np
import random
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Load dataset
with open('/content/dataset_indonesiaa.json') as file:
    data = json.load(file)

# Inisialisasi lemmatizer dan label encoder
lemmatizer = WordNetLemmatizer()
label_encoder = LabelEncoder()

# Persiapan data
patterns = []
tags = []
responses = {}

for intent in data['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        patterns.append(word_list)
        tags.append(intent['tag'])
    responses[intent['tag']] = intent['responses']

# Lematisasi dan encoding
lemmatized_patterns = [[lemmatizer.lemmatize(word.lower()) for word in pattern] for pattern in patterns]
tag_labels = label_encoder.fit_transform(tags)

# Membuat vocabulary
vocabulary = sorted(set(word for pattern in lemmatized_patterns for word in pattern))
vocab_size = len(vocabulary)
output_size = len(set(tags))

# One-hot encoding
def one_hot_encode(words, vocab):
    encoding = [0] * len(vocab)
    for word in words:
        if word in vocab:
            encoding[vocab.index(word)] = 1
    return encoding

# Encode patterns
encoded_patterns = np.array([one_hot_encode(pattern, vocabulary) for pattern in lemmatized_patterns])
encoded_tags = np.array(tag_labels)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Membangun model
model = Sequential([
    Dense(128, input_shape=(vocab_size,), activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(output_size, activation='softmax')
])

# Kompilasi model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Melatih model
model.fit(encoded_patterns, encoded_tags, epochs=200, batch_size=5, verbose=1)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x78ec4437cdc0>

In [None]:
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

def bag_of_words(sentence, words):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
    return np.array(bag)

def predict_class(sentence):
    bow = bag_of_words(sentence, vocabulary)
    res = model.predict(np.array([bow]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)
    return [{"intent": label_encoder.inverse_transform([r[0]])[0], "probability": str(r[1])} for r in results]

def get_response(intents_list, intents_json):
    tag = intents_list[0]['intent']
    for intent in intents_json['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])

def chatbot_response(text):
    ints = predict_class(text)
    res = get_response(ints, data)
    return res

# Test chatbot
print(chatbot_response("hai"))
print(chatbot_response("Selamat pagi"))
print(chatbot_response("Terima kasih untuk bantuannya"))


Halo yang disana.Senang melihat Anda kembali.Apa yang terjadi di dunia Anda saat ini?
Selamat pagi.Saya harap Anda tidur nyenyak.Bagaimana perasaan Anda hari ini?
Dengan senang hati
