In [1]:
!pip install PySastrawi



In [2]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\dzikr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
factory = StemmerFactory()
stemmer = factory.create_stemmer()

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import random
import json
import pickle
from tensorflow.keras.callbacks import EarlyStopping
import nltk
from sklearn.model_selection import KFold
from tensorflow.keras import layers, models, callbacks, optimizers
import numpy as np

In [4]:
# Load the intents file
with open('nlp_dataset_faq.json') as data_file:
    intents = json.load(data_file)

In [5]:
# Download the 'punkt_tab' data package
nltk.download('punkt_tab')

# Pre-processing
words = []
classes = []
documents = []
ignore = ['','!', '"', "'", '(', ')', ',', '-', '.', ':', ';', '?', '[', ']', '_',
          'adalah', 'akan', 'aku', 'anda', 'atau', 'dalam', 'dan',
          'dari', 'dengan', 'di', 'dia', 'harus', 'ini', 'itu', 'jika', 'kami', 'kamu', 'ke',
          'kita', 'mereka', 'oleh', 'pada', 'saya', 'sebuah', 'sedang', 'sementara', 'tanpa',
          'tapi', 'telah', 'untuk', 'yang', '{', '}', 'merasa']



#tokenize
for intent in intents:
    for pattern in intent['patterns']:
        w = nltk.word_tokenize(pattern.lower())  # Tokenizing
        words.extend([word for word in w if word not in ignore])  #Ignore
        documents.append((w, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\dzikr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [6]:
# Melakukan stemming dan normalisasi data
words = [stemmer.stem(w.lower()) for w in words if w not in ignore]
words = sorted(list(set(words)))

# Menghapus class duplikat dengan 'set'
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique stemmed words", words)

1535 documents
76 classes ['apa_itu_gangguan_mental', 'badmood_dampak', 'badmood_efek_jangka_panjang', 'badmood_gejala', 'badmood_pengobatan', 'badmood_penyebab', 'badmood_preventif', 'badmood_solusi_pribadi', 'badmood_terhadap_kesehatan_fisik', 'badmood_terhadap_pekerjaan', 'bunuh_diri', 'cara_mengatasi_benci', 'cara_mengatasi_bingung', 'cara_mengatasi_cemburu', 'cara_mengatasi_dendam', 'cara_mengatasi_frustasi', 'cara_mengatasi_gangguan_mental', 'cara_mengatasi_hampa', 'cara_mengatasi_iri', 'cara_mengatasi_kecewa', 'cara_mengatasi_kesal', 'cara_mengatasi_malu', 'cara_mengatasi_marah', 'cara_mengatasi_merasa_sendiri', 'cara_mengatasi_sedih', 'cara_mengatasi_takut', 'cara_mengatasi_takut_gagal', 'cara_mengatasi_tidak_dihargai', 'diagnosis_gangguan_mental', 'dukungan_mental_terapi', 'efek_gangguan_mental', 'emosi_bingung', 'emosi_frustrasi', 'emosi_hampa', 'emosi_kecewa', 'emosi_lelah', 'emosi_marah', 'emosi_sedih', 'emosi_senang', 'emosi_takut', 'emosi_takut_gagal', 'emosi_terbebani', 

In [7]:
import random
import numpy as np

training = []
output_empty = [0] * len(classes)

for doc in documents:
    pattern_words = [stemmer.stem(word.lower()) for word in doc[0]]
    bag = [1 if w in pattern_words else 0 for w in words]

    output_row = output_empty[:]
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

random.shuffle(training)
training = np.array(training, dtype=object)

train_x = np.array([item[0] for item in training])
train_y = np.array([item[1] for item in training])


## MODEL

In [8]:
model = models.Sequential([
        layers.Input(shape=(len(train_x[0]),), name='input_layer'),
        layers.Dense(256, activation='relu', kernel_regularizer='l2', name='hidden_layer1'),
        layers.BatchNormalization(),
        layers.Dropout(0.3, name='dropout1'),
        layers.Dense(128, activation='relu', kernel_regularizer='l2', name='hidden_layer2'),
        layers.BatchNormalization(),
        layers.Dropout(0.3, name='dropout2'),
        layers.Dense(len(train_y[0]), activation='softmax', name='output_layer')
    ])

In [9]:
model.summary()

## K_FOLD

In [10]:
tf.compat.v1.reset_default_graph()

# Hyperparameters
num_folds = 5
batch_size = 8
learning_rate = 1e-4
epochs = 100


train_x = np.array(train_x)
train_y = np.array(train_y)


kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
fold_accuracies = []

for fold, (train_index, val_index) in enumerate(kf.split(train_x)):
    print(f"\n=== Training Fold {fold + 1}/{num_folds} ===")
    
    X_train, X_val = train_x[train_index], train_x[val_index]
    y_train, y_val = train_y[train_index], train_y[val_index]

    tensorboard_callback = callbacks.TensorBoard(log_dir=f'./logs/fold_{fold+1}')
    early_stopping_callback = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    optimizer = optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[tensorboard_callback, early_stopping_callback],
        verbose=1
    )

    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    print(f"Fold {fold + 1} - Validation Accuracy: {val_accuracy:.4f}")
    fold_accuracies.append(val_accuracy)
    
avg_accuracy = np.mean(fold_accuracies)
print(f"\n=== Average Validation Accuracy: {avg_accuracy:.4f} ===")



=== Training Fold 1/5 ===
Epoch 1/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0158 - loss: 9.9675 - val_accuracy: 0.0261 - val_loss: 8.6633
Epoch 2/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0277 - loss: 8.8465 - val_accuracy: 0.0293 - val_loss: 8.2045
Epoch 3/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0488 - loss: 8.2125 - val_accuracy: 0.0586 - val_loss: 7.6769
Epoch 4/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0919 - loss: 7.6294 - val_accuracy: 0.1140 - val_loss: 7.2250
Epoch 5/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1037 - loss: 7.1962 - val_accuracy: 0.1498 - val_loss: 6.8651
Epoch 6/100
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1675 - loss: 6.7964 - val_accuracy: 0.1824 - val_loss: 6

## Callbacks

In [11]:
early_stopping_callback = callbacks.EarlyStopping(monitor='accuracy', patience=10, restore_best_weights=True)

## Training tanpa split

In [12]:
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train and save the final model
model.fit(train_x, train_y, epochs=100, batch_size=8, callbacks=[tensorboard_callback, early_stopping_callback], verbose=1)
model.save('final_model_trained_on_full_data.h5')

Epoch 1/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8728 - loss: 0.7374
Epoch 2/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8580 - loss: 0.7293
Epoch 3/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8966 - loss: 0.6350
Epoch 4/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9230 - loss: 0.5432
Epoch 5/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9278 - loss: 0.5508
Epoch 6/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9173 - loss: 0.5456
Epoch 7/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9545 - loss: 0.4656
Epoch 8/100
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9533 - loss: 0.4770
Epoch 9/100
[1m192/192[0m [32



In [13]:
import json

# Save
try:
    with open("training_data.json", "w") as file:
        # Convert non-serializable data like NumPy arrays or other objects to lists
        data = {'words': words, 'classes': classes, 'train_x': train_x.tolist(), 'train_y': train_y.tolist()}
        json.dump(data, file)
except IOError as e:
    print(f"Error saving training data: {e}")

In [14]:
# Save
try:
    with open("training_data", "wb") as file:
        pickle.dump({'words': words, 'classes': classes, 'train_x': train_x, 'train_y': train_y}, file)
except IOError as e:
    print(f"Error saving training data: {e}")

In [15]:
# Load
try:
    with open("training_data", "rb") as file:
        data = pickle.load(file)

    words = data['words']
    classes = data['classes']
    train_x = np.array(data['train_x'])
    train_y = np.array(data['train_y'])
except IOError as e:
    print(f"Error loading training data: {e}")

# Load model
try:
    model = keras.models.load_model('final_model_trained_on_full_data.h5')
except IOError as e:
    print(f"Error loading model: {e}")




In [16]:
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words, show_details=False):
    sentence_words = clean_up_sentence(sentence)
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in sentence_words:
            bag[idx] = 1
            if show_details:
                print(f"Found in bag: {w}")
    return bag

ERROR_THRESHOLD = 0.40



def classify(sentence):
    input_data = bow(sentence, words)
    results = model.predict(np.array([input_data]))[0]
    results = [[i, r] for i, r in enumerate(results) if r >= ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)
    return [(classes[r[0]], r[1]) for r in results]

def response(sentence, userID='123', show_details=False):
    results = classify(sentence)
    if results:
        for intent in intents:
            if intent['tag'] == results[0][0]:
                return random.choice(intent['responses'])
    return "Maaf, saya belum mengerti apa yang Anda bicarakan, perlu bantuan?."

In [None]:
print("0 to close")
while True:
    message = input("")
    if message == "0":
        break
    result = response(message)

    if result is not None and "~" in result:
        order = (result[1:])
        action(order)
    else:
        print(result)


0 to close


 cara mengatasi rasa cemas


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Kadang-kadang kecemasan bisa sangat mengganggu. Jangan ragu untuk mencari cara untuk menenangkan diri atau berbicara dengan seorang profesional.


 bagaimana cara mengatasi kesedihan


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Luangkan waktu untuk diri sendiri dan lakukan hal-hal yang kamu nikmati.


 bagaimana cara mengatasi kesedihan?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Luangkan waktu untuk diri sendiri dan lakukan hal-hal yang kamu nikmati.


 bagaimana cara mengatasi kesedihan?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Luangkan waktu untuk menangis jika perlu, itu adalah cara yang sehat untuk melepaskan emosi.


In [None]:
model = tf.keras.models.load_model('final_model_trained_on_full_data.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()


with open('model.tflite', 'wb') as f:
  f.write(tflite_model)