In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import json
import string
import nltk
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Flatten
from tensorflow.keras.models import Sequential, load_model
import pickle
import random

# Load JSON File yaang bernama intents.json
intents = json.loads(open('intents.json').read())

tags = []
inputs = []
responses = {}
for intent in intents['intents']:
    responses[intent['tag']] = intent["responses"]
    for lines in intent['patterns']:
        inputs.append(lines)
        tags.append(intent['tag'])

print("this is tags", tags)
print("this is input", inputs)
print("this is responses", responses)

# Mebuat dataframe
data = pd.DataFrame({"inputs": inputs, "tags": tags})

# Preprocess input data
data["inputs"] = data["inputs"].apply(lambda wrd: ''.join([ltrs.lower() for ltrs in wrd if ltrs not in string.punctuation]))

# Tokenization dan padding
tokenizer = Tokenizer(num_words=2000)
tokenizer.fit_on_texts(data["inputs"])
train = tokenizer.texts_to_sequences(data["inputs"])
x_train = pad_sequences(train)

# Label encoding
le = LabelEncoder()
y_train = le.fit_transform(data["tags"])

# input shape, unique words, and output length
input_shape = x_train.shape[1]
unique_words = len(tokenizer.word_index)
output_length = le.classes_.shape[0]
print("unique words: ", unique_words)
print("output length: ", output_length)

# Membangun model LSTM
model = Sequential()
model.add(Input(shape=(input_shape,)))
model.add(Embedding(unique_words + 1, 10, input_length=(input_shape,)))
model.add(LSTM(10, return_sequences=True))
model.add(Flatten())
model.add(Dense(units=10, activation='relu'))
model.add(Dense(units=5, activation='relu'))
model.add(Dense(units=output_length, activation='softmax'))

# Compile model
model.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=['accuracy'])

# Train model
model.fit(x_train, y_train, epochs=300)

# Save model format .h5
model.save("Chatbot.h5")

# Save fitted LabelEncoder
with open('label_encoder.pkl', 'wb') as le_file:
    pickle.dump(le, le_file)

# Example of loading the model and making predictions
# Load the model
loaded_model = load_model("Chatbot.h5")

# Load  fitted LabelEncoder
with open('label_encoder.pkl', 'rb') as le_file:
    le = pickle.load(le_file)

# Example of using the loaded model for predictions
# Assuming you have new_inputs as the input data
new_inputs = ["example input 1", "example input 2"]
processed_inputs = tokenizer.texts_to_sequences(new_inputs)
padded_inputs = pad_sequences(processed_inputs, maxlen=input_shape)
prediction = loaded_model.predict(padded_inputs)

# Convert prediction to label using the loaded LabelEncoder
predicted_labels = np.argmax(prediction, axis=1)
predicted_tags = le.inverse_transform(predicted_labels)

print("Predicted Tags:", predicted_tags)


this is tags ['salam', 'salam', 'salam', 'salam', 'salam', 'salam', 'selamat tinggal', 'selamat tinggal', 'selamat tinggal', 'selamat tinggal', 'selamat tinggal', 'terima kasih', 'terima kasih', 'terima kasih', 'terima kasih', 'terima kasih', 'pilihan', 'pilihan', 'pilihan', 'pilihan', 'pilihan', 'konservasi_hewan', 'konservasi_hewan', 'konservasi_hewan', 'konservasi_hewan', 'konservasi_hewan', 'fakta_konservasi', 'fakta_konservasi', 'fakta_konservasi', 'fakta_konservasi', 'fakta_konservasi', 'informasi_kunjungan', 'informasi_kunjungan', 'informasi_kunjungan', 'informasi_kunjungan', 'informasi_kunjungan', 'hewan_langka', 'hewan_langka', 'hewan_langka', 'hewan_langka', 'kegiatan_edukasi', 'kegiatan_edukasi', 'kegiatan_edukasi', 'kegiatan_edukasi']
this is input ['Halo', 'Apa kabar?', 'Ada yang bisa saya bantu?', 'Hai', 'Hello', 'Selamat siang', 'Selamat tinggal', 'Sampai jumpa nanti', 'Sampai jumpa', 'Terima kasih atas kunjungannya, sampai jumpa nanti', 'Sampai ketemu lain waktu', 'Teri

  saving_api.save_model(


Predicted Tags: ['salam' 'salam']
