# Import library dan modul yang diperlukan

In [16]:
import nltk
from nltk.stem import WordNetLemmatizer
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
import random

# Pra-Pemrosesan Data

In [17]:
# Inisialisasi objek lemmatizer untuk mengubah kata-kata menjadi bentuk dasar
lemmatizer = WordNetLemmatizer()

# Membaca data intents dari file JSON
data_file = open('data/intents.json').read()
intents = json.loads(data_file)

# Inisialisasi list untuk menyimpan kata-kata, kelas, dan dokumen
words = []
classes = []
documents = []
ignore_words = ['?', '!']

# Proses data intents untuk mendapatkan kata-kata, kelas, dan dokumen
for intent in intents['intents']:
    for pattern in intent['patterns']:
        
        # Tokenisasi setiap kata
        tokenized_words = nltk.word_tokenize(pattern)
        words.extend(tokenized_words)
        
        # Menambahkan dokumen ke dalam korpus
        documents.append((tokenized_words, intent['tag']))

        # Menambahkan kelas ke dalam list classes
        if intent['tag'] not in classes:
            classes.append(intent['tag'])


In [18]:
# Lemmatize dan ubah huruf kecil untuk setiap kata, dan hapus duplikat
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore_words]
words = sorted(list(set(words)))

# Sortir kelas
classes = sorted(list(set(classes)))

# Menyimpan kata-kata dan kelas ke dalam file pickle
with open('texts.pkl', 'wb') as texts_file:
    pickle.dump(words, texts_file)
    
with open('labels.pkl', 'wb') as labels_file:
    pickle.dump(classes, labels_file)

with open('texts.pkl', 'rb') as text_file:
    texts_read = pickle.load(text_file)
    
with open('labels.pkl', 'rb') as label_file:
    labels_read = pickle.load(label_file)

    print(texts_read)
    print(labels_read)
    
    

['/help', 'agar', 'aku', 'apa', 'apaan', 'babi', 'bagaimana', 'baiklah', 'bantu', 'bantuan', 'berobat', 'bertanya', 'bingung', 'bisa', 'boleh', 'bot', 'botdino', 'bye', 'cara', 'cegah', 'daah', 'dadah', 'dah', 'dawdawidjaiwldj', 'demam', 'diagnobot', 'diagnobott', 'diano', 'diare', 'dignobot', 'digobot', 'dinobot', 'diobatin', 'dong', 'emang', 'flu', 'gatal', 'gejala', 'hai', 'halo', 'help', 'hi', 'hidung', 'influenza', 'insomnia', 'itu', 'kabar', 'kak', 'kamu', 'kena', 'kepala', 'kerja', 'kesulitan', 'ketik', 'lo', 'lu', 'makasih', 'malam', 'mata', 'mau', 'mencegah', 'mencegahnya', 'merah', 'minta', 'monyet', 'nama', 'ngapain', 'obat', 'obatin', 'ok', 'p', 'pagi', 'pekerjaan', 'pengobatan', 'pilek', 'ping', 'punggung', 'ringan', 'sakit', 'salam', 'sape', 'saran', 'sariawan', 'saya', 'selamat', 'si', 'siang', 'siapa', 'sih', 'susah', 'terkena', 'tersumbat', 'tidak', 'tidur', 'tinggal', 'tolong', 'tugas', 'tuh', 'wah', 'ya']
['bye', 'cegah_demam', 'cegah_diare', 'cegah_gatal', 'cegah_in

# TRAINING DATA [Melatih Data]

In [19]:
# Membuat data latihan
training = []
output_empty = [0] * len(classes)

# Membuat set latihan, bag of words untuk setiap kalimat
for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])

# Mengacak fitur dan mengubahnya menjadi np.array dengan dtype=object
random.shuffle(training)
training = np.array(training, dtype=object)

In [20]:
# Membuat list train_x (pola) dan train_y (niat)
train_x = list(training[:,0])
train_y = list(training[:,1])

# Membuat model neural network dengan Keras
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Mengompilasi model menggunakan SGD dengan Nesterov accelerated gradient
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Melatih model dan menyimpannya ke dalam file 'model.h5'
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('model.h5', hist)

# Pesan konfirmasi bahwa model telah dibuat
print("Model berhasil dibuat")

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78