In [None]:
!pip install sastrawi

Collecting sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl.metadata (909 bytes)
Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/209.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sastrawi
Successfully installed sastrawi-1.0.1


In [None]:
import json
import pandas as pd
import string
import pickle
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import (TextVectorization, Embedding,
                                     Input, LSTM, Dropout, Dense)
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

In [None]:
data = pd.read_json('chatbot-intents-variasi-unik.json')
data

Unnamed: 0,intents
0,"{'tag': 'salam', 'patterns': ['Halo', 'Saya ma..."
1,"{'tag': 'nama', 'patterns': ['lu sape ?', 'nam..."
2,"{'tag': 'bye', 'patterns': ['Ceritakan tentang..."
3,"{'tag': 'konsultasi_dokter', 'patterns': ['Cer..."
4,"{'tag': 'pergerakan_bayi', 'patterns': ['Seber..."
5,"{'tag': 'kontraksi_palsu', 'patterns': ['Aku s..."
6,"{'tag': 'istirahat', 'patterns': ['Berapa bany..."
7,"{'tag': 'persalinan', 'patterns': ['Saya mau t..."
8,"{'tag': 'suplemen', 'patterns': ['Perlukah say..."
9,"{'tag': 'aktivitas_seksual', 'patterns': ['Apa..."


In [None]:
text_input = []
intents = []
for intent in data['intents']:
    for pattern in intent['patterns']:
        text_input.append(pattern)
        intents.append(intent['tag'])

df = pd.DataFrame({'text_input': text_input, 'intents': intents})
df

Unnamed: 0,text_input,intents
0,Halo,salam
1,Saya mau tanya tentang salam,salam
2,Ceritakan tentang salam dong,salam
3,Hai,salam
4,Ping,salam
...,...,...
758,Gizi tambahan untuk ibu anemia apa aja?,makanan_tambahan_kek_anemia
759,Apa aja yang perlu diketahui tentang makanan?,makanan_tambahan_kek_anemia
760,Suplemen atau makanan tambahan KEK?,makanan_tambahan_kek_anemia
761,Makanan tambahan untuk ibu hamil KEK apa saja?,makanan_tambahan_kek_anemia


In [None]:
df['intents'].value_counts()

Unnamed: 0_level_0,count
intents,Unnamed: 1_level_1
deteksi_keluhan,20
rekomendasi_nutrisi,20
salam,15
trimester_ketiga,15
postpartum,15
istirahat,15
suplemen,15
imunisasi,15
aktivitas_seksual,15
kaki_bengkak,15


In [None]:
# Data Preprocessing
factory = StemmerFactory()
stemmer = factory.create_stemmer()

def clean_and_stem(text):
    text = text.lower()
    text = ''.join(ch for ch in text if ch not in string.punctuation)
    text = stemmer.stem(text)
    return text

df['text_input'] = df['text_input'].apply(clean_and_stem)

In [None]:
df

Unnamed: 0,text_input,intents
0,halo,salam
1,saya mau tanya tentang salam,salam
2,cerita tentang salam dong,salam
3,hai,salam
4,ping,salam
...,...,...
758,gizi tambah untuk ibu anemia apa aja,makanan_tambahan_kek_anemia
759,apa aja yang perlu tahu tentang makan,makanan_tambahan_kek_anemia
760,suplemen atau makan tambah kek,makanan_tambahan_kek_anemia
761,makan tambah untuk ibu hamil kek apa saja,makanan_tambahan_kek_anemia


In [None]:
le = LabelEncoder()
df['encoded'] = le.fit_transform(df['intents'])
y_encoded = df['encoded'].values
y_onehot = to_categorical(y_encoded)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=429)
tokenizer.fit_on_texts(df['text_input'])
sequences = tokenizer.texts_to_sequences(df['text_input'])
X = pad_sequences(sequences, maxlen=10)

In [None]:
tokenizer_json = tokenizer.to_json()

with open('tokenizer.json', 'w') as f:
    json.dump(tokenizer_json, f)

In [None]:
tokenizer_json = tokenizer.to_json()

with open('tokenizer.json', 'w') as f:
    json.dump(tokenizer_json, f)

In [None]:
from tensorflow.keras import layers, models

model = models.Sequential()
model.add(layers.Embedding(input_dim=429, output_dim=128, input_length=10))
model.add(layers.Conv1D(258, kernel_size=3))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(len(le.classes_), activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()




In [None]:
class_weights_arr = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_encoded),
    y=y_encoded
)
class_weights = {i: w for i, w in zip(np.unique(y_encoded), class_weights_arr)}

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2)



In [None]:
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.int32)
y_val_tensor = tf.convert_to_tensor(y_val, dtype=tf.int32)

In [None]:
# EarlyStopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Training the model with EarlyStopping and class_weight
history = model.fit(
    X_train, y_train_tensor,
    validation_data=(X_val, y_val_tensor),
    epochs=200,
    verbose=1,
    callbacks=[early_stop],
    class_weight=class_weights
)

Epoch 1/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 45ms/step - accuracy: 0.0264 - loss: 4.4152 - val_accuracy: 0.0523 - val_loss: 3.9303
Epoch 2/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0843 - loss: 3.6860 - val_accuracy: 0.0719 - val_loss: 3.9233
Epoch 3/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2096 - loss: 3.3547 - val_accuracy: 0.1176 - val_loss: 3.9038
Epoch 4/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.3124 - loss: 2.8038 - val_accuracy: 0.2484 - val_loss: 3.8689
Epoch 5/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.4233 - loss: 2.2944 - val_accuracy: 0.2941 - val_loss: 3.8093
Epoch 6/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.6212 - loss: 1.6705 - val_accuracy: 0.4183 - val_loss: 3.7372
Epoch 7/200
[1m20/20[0m [

In [None]:
# Convert the model to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TFLite model
with open('chatbot_model.tflite', 'wb') as f:
    f.write(tflite_model)

Saved artifact at '/tmp/tmppp5yi8o1'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 10), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 51), dtype=tf.float32, name=None)
Captures:
  132381284095504: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381284095120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381284093392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381284095696: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381284091088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381284090128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381284090704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381283791696: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381283794768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381283791888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132381283791504: Tenso

In [None]:
import pickle

# Simpan LabelEncoder yang telah dilatih
with open('label_encoder.pickle', 'wb') as le_file:
    pickle.dump(le, le_file)

In [None]:
le_filename = open("label_encoder.pickle", "rb")
le = pickle.load(le_filename)
le_filename.close()

In [None]:
interpreter = tf.lite.Interpreter(model_path="chatbot_model.tflite")
interpreter.allocate_tensors()

In [None]:
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [None]:
tokenizer = Tokenizer(num_words=429)
tokenizer.fit_on_texts(df['text_input'])

In [None]:
# Memuat dataset sendiri dari CSV
dataset = pd.read_json('chatbot-intents-variasi-unik.json')

In [None]:
dataset

Unnamed: 0,intents
0,"{'tag': 'salam', 'patterns': ['Halo', 'Saya ma..."
1,"{'tag': 'nama', 'patterns': ['lu sape ?', 'nam..."
2,"{'tag': 'bye', 'patterns': ['Ceritakan tentang..."
3,"{'tag': 'konsultasi_dokter', 'patterns': ['Cer..."
4,"{'tag': 'pergerakan_bayi', 'patterns': ['Seber..."
5,"{'tag': 'kontraksi_palsu', 'patterns': ['Aku s..."
6,"{'tag': 'istirahat', 'patterns': ['Berapa bany..."
7,"{'tag': 'persalinan', 'patterns': ['Saya mau t..."
8,"{'tag': 'suplemen', 'patterns': ['Perlukah say..."
9,"{'tag': 'aktivitas_seksual', 'patterns': ['Apa..."


In [None]:
def predict_tflite(text):
    # Proses teks menjadi token numerik
    input_data = tokenizer.texts_to_sequences([text])
    input_data = pad_sequences(input_data, maxlen=10)  # Pad sequence untuk mendapatkan panjang yang sama

    # Pastikan input_data bertipe FLOAT32
    input_data = np.array(input_data, dtype=np.float32)

    # Set tensor input dan lakukan inferensi
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # Ambil hasil prediksi dan probabilitas tertinggi
    output_data = interpreter.get_tensor(output_details[0]['index'])

    # Ambil prediksi kelas dan probabilitas tertinggi
    predicted_class = np.argmax(output_data, axis=1)[0]
    prob_max = np.max(output_data, axis=1)[0]

    # Gunakan LabelEncoder yang telah dimuat untuk mengonversi hasil numerik menjadi label
    result_label = le.inverse_transform([predicted_class])[0]

    return result_label, prob_max

In [None]:
def chat():
    print("Anda akan dihubungkan ke bot kami, mohon ditunggu...")
    exit = False
    while not exit:
        inp = input("Anda : ")
        inp_clean = clean_and_stem(inp)  # Preprocess input teks sesuai kebutuhan

        # Convert input ke tensor string dengan shape (1, 1)
        inp_tensor = tf.convert_to_tensor([[inp_clean]], dtype=tf.string)

        # Panggil fungsi untuk prediksi
        result, prob_max = predict_tflite(inp_clean)

        print(f"Prediksi kelas: {result}, Probabilitas: {prob_max}")

        # Menyesuaikan threshold untuk probabilitas rendah
        if prob_max.max() < 0.1:
            print("Bot : Maaf kak, untuk pertanyaan tersebut baiknya ditanyakan langsung ke Dokter Spesialis.")
        else:
            for tg in data['intents']:
                if tg['tag'] == result:
                    responses = tg['responses']
            if result == 'bye':
                exit = True
                print("END CHAT")
            print(f"Bot : {random.choice(responses)}")

# Memulai percakapan
chat()

Anda akan dihubungkan ke bot kami, mohon ditunggu...
Anda : hai
Prediksi kelas: salam, Probabilitas: 0.9001975655555725
Bot : Halo, ada yang bisa saya bantu?
Anda : apa itu stunting?
Prediksi kelas: stunting_pengertian, Probabilitas: 0.6272615194320679
Bot : Stunting menunjukkan bahwa seorang anak mengalami hambatan pertumbuhan fisik dan juga dapat memengaruhi perkembangan otaknya.
Anda : apa ciri-ciri stunting?
Prediksi kelas: stunting_pengertian, Probabilitas: 0.39758649468421936
Bot : Stunting adalah pertumbuhan tubuh yang terhambat karena gizi buruk, infeksi berulang, dan kurangnya perawatan selama masa awal kehidupan anak.
Anda : apa itu ibu hamil kek?
Prediksi kelas: ibu_hamil_kek, Probabilitas: 0.847233772277832
Bot : Kekurangan Energi Kronis (KEK) pada ibu hamil adalah kondisi di mana ibu mengalami kekurangan asupan energi dan protein secara terus-menerus, ditandai dengan Lingkar Lengan Atas (LILA) < 23,5 cm.
Anda : ok
Prediksi kelas: bye, Probabilitas: 0.41025954484939575
END 

# Interface untuk mobile

In [None]:
import android.app.Activity;
import android.content.res.AssetFileDescriptor;
import android.graphics.Bitmap;

import org.tensorflow.lite.DataType;
import org.tensorflow.lite.Interpreter;
import org.tensorflow.lite.support.common.FileUtil;
import org.tensorflow.lite.support.common.TensorOperator;
import org.tensorflow.lite.support.common.TensorProcessor;
import org.tensorflow.lite.support.common.ops.NormalizeOp;
import org.tensorflow.lite.support.image.ImageProcessor;
import org.tensorflow.lite.support.image.TensorImage;
import org.tensorflow.lite.support.image.ops.ResizeOp;
import org.tensorflow.lite.support.image.ops.ResizeWithCropOrPadOp;
import org.tensorflow.lite.support.label.TensorLabel;
import org.tensorflow.lite.support.tensorbuffer.TensorBuffer;

import java.io.FileInputStream;
import java.io.IOException;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class TFLiteHelper {

    private int imageSizeX;
    private int imageSizeY;

    private List<String> labels;
    private Interpreter tflite;

    private MappedByteBuffer tfliteModel;
    private TensorImage inputImageBuffer;
    private TensorBuffer outputProbabilityBuffer;
    private TensorProcessor probabilityProcessor;

    private static final float IMAGE_MEAN = 0.0f;
    private static final float IMAGE_STD = 1.0f;

    private static final float PROBABILITY_MEAN = 0.0f;
    private static final float PROBABILITY_STD = 255.0f;

    private Activity context;

    TFLiteHelper(Activity context) {
        this.context = context;
    }

    // ---- Kolom inisiasi TensorFlow Lite Interpreter ----

    void init() {
        try {
            Interpreter.Options opt = new Interpreter.Options();
            tflite = new Interpreter(loadmodelfile(context), opt);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    // ----------------------------------------------------

    // ---- Kolom preprocessing gambar ----
    private TensorImage loadImage(final Bitmap bitmap) {
        // Loads bitmap into a TensorImage.
        inputImageBuffer.load(bitmap);

        // Creates processor for the TensorImage.
        int cropSize = Math.min(bitmap.getWidth(), bitmap.getHeight());
        // TODO(b/143564309): Fuse ops inside ImageProcessor.
        ImageProcessor imageProcessor =
                new ImageProcessor.Builder()
                        .add(new ResizeWithCropOrPadOp(cropSize, cropSize))
                        .add(new ResizeOp(imageSizeX, imageSizeY, ResizeOp.ResizeMethod.NEAREST_NEIGHBOR))
                        .add(getPreprocessNormalizeOp())
                        .build();
        return imageProcessor.process(inputImageBuffer);
    }
    // ----------------------------------------------------

    // ---- Kolom pemanggilan model tflite ----
    private MappedByteBuffer loadmodelfile(Activity activity) throws IOException {
        String MODEL_NAME = "vegs.tflite";
        AssetFileDescriptor fileDescriptor = activity.getAssets().openFd(MODEL_NAME);
        FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
        FileChannel fileChannel = inputStream.getChannel();
        long startoffset = fileDescriptor.getStartOffset();
        long declaredLength = fileDescriptor.getDeclaredLength();
        return fileChannel.map(FileChannel.MapMode.READ_ONLY, startoffset, declaredLength);
    }

    void classifyImage(Bitmap bitmap){
        int imageTensorIndex = 0;
        int[] imageShape = tflite.getInputTensor(imageTensorIndex).shape(); // {1, height, width, 3}
        imageSizeY = imageShape[1];
        imageSizeX = imageShape[2];
        DataType imageDataType = tflite.getInputTensor(imageTensorIndex).dataType();

        int probabilityTensorIndex = 0;
        int[] probabilityShape =
                tflite.getOutputTensor(probabilityTensorIndex).shape(); // {1, NUM_CLASSES}
        DataType probabilityDataType = tflite.getOutputTensor(probabilityTensorIndex).dataType();

        inputImageBuffer = new TensorImage(imageDataType);
        outputProbabilityBuffer = TensorBuffer.createFixedSize(probabilityShape, probabilityDataType);
        probabilityProcessor = new TensorProcessor.Builder().add(getPostprocessNormalizeOp()).build();

        inputImageBuffer = loadImage(bitmap);

        tflite.run(inputImageBuffer.getBuffer(),outputProbabilityBuffer.getBuffer().rewind());

    }

    private TensorOperator getPreprocessNormalizeOp() {
        return new NormalizeOp(IMAGE_MEAN, IMAGE_STD);
    }
    // ----------------------------------------------------

    // ---- Kolom postprocessing ----
    public String showresult() {
        try {
            labels = FileUtil.loadLabels(context, "vegs.txt");
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
        Map<String, Float> labeledProbability =
                new TensorLabel(labels, probabilityProcessor.process(outputProbabilityBuffer))
                        .getMapWithFloatValue();
        float maxValueInMap = (Collections.max(labeledProbability.values()));
        String result = null;
        for (Map.Entry<String, Float> entry : labeledProbability.entrySet()) {
            if (entry.getValue() == maxValueInMap) {
                result = entry.getKey();
            }
        }

        return result;
    }

    private TensorOperator getPostprocessNormalizeOp() {
        return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD);
    }
    // ----------------------------------------------------

}