<a href="https://colab.research.google.com/github/bryanbayup/Machine-Learning/blob/main/PetpointWithGeneralConver.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install Sastrawi

Collecting Sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl.metadata (909 bytes)
Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Sastrawi
Successfully installed Sastrawi-1.0.1


Membaca Dataset dan Preprocessing Teks

In [17]:
import re
import string
import pandas as pd
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

# Memuat dataset penyakit
df_disease = pd.read_csv('disease_classification.csv')
# Memuat dataset percakapan umum
df_conversation = pd.read_csv('general_conversation.csv')

In [18]:
# Fungsi untuk membersihkan teks
def clean_text(text):
    text = text.lower()  # ubah teks menjadi huruf kecil
    text = re.sub(r'\d+', '', text)  # hapus angka
    text = text.translate(str.maketrans('', '', string.punctuation))  # hapus tanda baca
    text = text.strip()  # hapus spasi berlebih
    return text

# Fungsi untuk menghapus stopwords
def remove_stopwords(text):
    factory = StopWordRemoverFactory()
    stopword_remover = factory.create_stop_word_remover()
    cleaned_text = stopword_remover.remove(text)
    return cleaned_text

In [19]:
# terapkan fungsi clean & stopwords
df_disease['Cleaned_Gejala'] = df_disease['Gejala'].apply(clean_text).apply(remove_stopwords)
df_disease['Cleaned_Penanganan'] = df_disease['Penanganan Pertama'].apply(clean_text).apply(remove_stopwords)

df_conversation['Cleaned_Input'] = df_conversation['Input'].apply(clean_text).apply(remove_stopwords)
df_conversation['Cleaned_Output'] = df_conversation['Output'].apply(clean_text).apply(remove_stopwords)

In [20]:
from sklearn.preprocessing import LabelEncoder

# ubah nama penyakit menjadi label numerik
le_disease = LabelEncoder()
df_disease['Label_Penyakit'] = le_disease.fit_transform(df_disease['Nama Penyakit'])

# ubah nama hewan menjadi label numerik
le_animal = LabelEncoder()
df_disease['Label_Hewan'] = le_animal.fit_transform(df_disease['Nama Hewan'])

In [21]:
from sklearn.preprocessing import LabelEncoder

le_output = LabelEncoder()
df_conversation['Label_Output'] = le_output.fit_transform(df_conversation['Output'])

# tampilkan label dan mappingnya
label_mapping = dict(zip(le_output.classes_, le_output.transform(le_output.classes_)))
print("Label Mapping:", label_mapping)

Label Mapping: {'Beberapa gejala umum termasuk demam, muntah, diare, kelelahan, dan perubahan perilaku. Apakah ada yang lain yang bisa saya bantu?': 0, 'Coba beri makanan yang lembut atau basah, pastikan tidak ada masalah kesehatan, dan konsultasikan dengan dokter hewan jika masalah berlanjut. Apakah ada yang lain yang bisa saya bantu?': 1, 'Gejala dehidrasi pada anjing meliputi mata cekung, gusi kering, penurunan elastisitas kulit, dan lemas. Apakah ada yang lain yang bisa saya bantu?': 2, 'Gejala rabies pada anjing termasuk perubahan perilaku, agresi, kejang, dan kesulitan bernafas. Apakah ada yang lain yang bisa saya bantu?': 3, 'Hai! Apa kabar?': 4, 'Halo! Ada yang bisa saya bantu?': 5, 'Isolasi kucing yang terinfeksi, berikan makanan lunak, bersihkan luka di mulut, dan konsultasikan dengan dokter hewan untuk perawatan medis. Apakah ada yang lain yang bisa saya bantu?': 6, 'Istirahat yang cukup, berikan cairan yang cukup, dan konsultasikan dengan dokter hewan jika batuk berlangsung

In [26]:
from sklearn.feature_extraction.text import TfidfVectorizer

# gabungkan semua teks gejala
all_text1 = df_disease['Cleaned_Gejala'].tolist()

# buat TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
X_disease = vectorizer.fit_transform(all_text1)

# set label penyakit sebagai target
y_disease = df_disease['Label_Penyakit']

In [27]:
from sklearn.feature_extraction.text import TfidfVectorizer

# gabungkan semua teks input
all_text2 = df_conversation['Cleaned_Input'].tolist()

# buat TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
X_conversation = vectorizer.fit_transform(all_text2)

# set label output sebagai target
y_conversation = df_conversation['Label_Output']

In [28]:
from sklearn.model_selection import train_test_split

# bagi Data untuk Training dan Testing disease
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(
    X_disease, y_disease, test_size=0.2, random_state=42)

In [29]:
# bagi Data untuk Training dan Testing conversation
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
    X_conversation, y_conversation, test_size=0.2, random_state=42)

In [35]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# konversi sparse matrix ke dense matrix
X_train_d_dense = X_train_d.toarray()
X_test_d_dense = X_test_d.toarray()

# dapatkan jumlah fitur
input_dim1 = X_train_d_dense.shape[1]

# bangun model 1
model1 = Sequential()
model1.add(Dense(128, input_shape=(input_dim1,), activation='relu'))
model1.add(Dropout(0.5))
model1.add(Dense(64, activation='relu'))
model1.add(Dense(len(le_disease.classes_), activation='softmax'))

# mengkompilasi model 1
model1.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Menampilkan ringkasan model 1
model1.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [36]:
# latih model 1 untuk klasifikasi
epochs = 50
batch_size = 64

history = model1.fit(X_train_d_dense, y_train_d,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(X_test_d_dense, y_test_d))

Epoch 1/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 132ms/step - accuracy: 0.0914 - loss: 2.2843 - val_accuracy: 0.2500 - val_loss: 2.2061
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.1871 - loss: 2.2215 - val_accuracy: 0.7750 - val_loss: 2.1335
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.3539 - loss: 2.1488 - val_accuracy: 1.0000 - val_loss: 2.0558
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6281 - loss: 2.0686 - val_accuracy: 1.0000 - val_loss: 1.9716
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.8152 - loss: 1.9822 - val_accuracy: 1.0000 - val_loss: 1.8796
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.9363 - loss: 1.8886 - val_accuracy: 1.0000 - val_loss: 1.7776
Epoch 7/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━

In [37]:
# konversi sparse matrix ke dense matrix
X_train_c_dense = X_train_c.toarray()
X_test_c_dense = X_test_c.toarray()

# dapatkan jumlah fitur
input_dim2 = X_train_c_dense.shape[1]

# bangun model 2
model2 = Sequential()
model2.add(Dense(128, input_shape=(input_dim2,), activation='relu'))
model2.add(Dropout(0.5))
model2.add(Dense(64, activation='relu'))
model2.add(Dense(len(le_output.classes_), activation='softmax'))

# mengkompilasi model 2
model2.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# menampilkan ringkasan model 2
model2.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [38]:
# latih model 2 untuk conversation
history2 = model2.fit(X_train_c_dense, y_train_c,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(X_test_c_dense, y_test_c))

Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 350ms/step - accuracy: 0.0000e+00 - loss: 3.6777 - val_accuracy: 0.0500 - val_loss: 3.6437
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.0000e+00 - loss: 3.6582 - val_accuracy: 0.0500 - val_loss: 3.6281
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0573 - loss: 3.6304 - val_accuracy: 0.0500 - val_loss: 3.6116
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.0354 - loss: 3.6087 - val_accuracy: 0.0500 - val_loss: 3.5950
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1198 - loss: 3.5867 - val_accuracy: 0.1500 - val_loss: 3.5788
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.1469 - loss: 3.5739 - val_accuracy: 0.2000 - val_loss: 3.5632
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━

In [39]:
# evaluasi model 1
loss, accuracy = model1.evaluate(X_test_d_dense, y_test_d)
print(f'Akurasi model1: {accuracy * 100:.2f}%')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0041 
Akurasi model1: 100.00%


In [41]:
# evaluasi model 2
loss, accuracy = model2.evaluate(X_test_c_dense, y_test_c)
print(f'Akurasi model2: {accuracy * 100:.2f}%')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.7000 - loss: 2.0886
Akurasi model2: 70.00%


In [44]:
# membuat dictionary input-output
conversation_dict = dict(zip(df_conversation['Cleaned_Input'], df_conversation['Output']))

def get_general_response(user_input):
    processed_input = clean_text(user_input)
    processed_input = remove_stopwords(processed_input)
    response = conversation_dict.get(processed_input, None)
    return response

In [49]:
# cek jenis Hewan dari Input
def detect_animal_type(user_input):
    animal_types = ['kucing', 'anjing']
    for animal in animal_types:
        if animal in user_input.lower():
            return animal
    return None

In [51]:
def ask_for_animal_type():
    while True:
        animal_type = input("Chatbot: Hewan apa yang Anda miliki? (kucing/anjing): ").lower()
        if animal_type in ['kucing', 'anjing']:
            return animal_type
        else:
            print("Chatbot: Maaf, saya hanya bisa memberikan saran untuk kucing atau anjing.")
            return None

In [56]:
def predict_disease(symptom_text, animal_type):
    if animal_type not in ['kucing', 'anjing']:
        return None, None
    # Filter dataset berdasarkan jenis hewan
    df_animal = df_disease[df_disease['Nama Hewan'].str.lower() == animal_type]
    if df_animal.empty:
        return None, None
    # Preprocessing input pengguna
    processed_text = clean_text(symptom_text)
    processed_text = remove_stopwords(processed_text)
    vectorized_text = vectorizer.transform([processed_text]).toarray()
    # Prediksi penyakit
    prediction = model.predict(vectorized_text)
    predicted_label = prediction.argmax(axis=-1)[0]
    disease_name = le_disease.inverse_transform([predicted_label])[0]
    # Pastikan penyakit tersebut ada dalam data hewan yang dipilih
    if disease_name not in df_animal['Nama Penyakit'].values:
        return None, None
    return disease_name, df_animal

In [52]:
def get_first_aid(disease_name, df_animal):
    penanganan = df_animal[df_animal['Nama Penyakit'] == disease_name]['Penanganan Pertama'].iloc[0]
    return penanganan

In [45]:
def is_symptom_input(user_input):
    # Daftar gejala umum
    symptoms_keywords = ['muntah', 'demam', 'batuk', 'diare', 'kejang', 'lemas', 'pilek', 'bersin', 'luka']
    return any(keyword in user_input.lower() for keyword in symptoms_keywords)

def chatbot_response(user_input):
    if is_symptom_input(user_input):
        # Gunakan model klasifikasi penyakit
        animal_type = detect_animal_type(user_input)
        if not animal_type:
            animal_type = ask_for_animal_type()
            if not animal_type:
                return "Maaf, saya hanya bisa memberikan saran untuk kucing atau anjing."
        disease_name, df_animal = predict_disease(user_input, animal_type)
        if not disease_name:
            return f"Maaf, saya tidak dapat menemukan penyakit yang sesuai untuk gejala tersebut pada {animal_type}."
        first_aid = get_first_aid(disease_name, df_animal)
        response = f"Sepertinya {animal_type} Anda sakit {disease_name}. {first_aid}"
        return response
    else:
        # Gunakan model percakapan umum
        response = get_general_response(user_input)
        if response:
            return response
        else:
            return "Maaf, saya tidak mengerti. Bisa dijelaskan lebih lanjut?"

In [None]:
while True:
    user_input = input("Anda: ")
    if user_input.lower() in ['exit', 'quit', 'keluar']:
        print("Chatbot: Terima kasih, semoga membantu!")
        break
    response = chatbot_response(user_input)
    print("Chatbot:", response)

Anda: hewan saya sakit
Chatbot: Maaf, saya tidak mengerti. Bisa dijelaskan lebih lanjut?
Anda: peliharaan saya sakit
Chatbot: Maaf, saya tidak mengerti. Bisa dijelaskan lebih lanjut?
Anda: gajah saya mual
Chatbot: Maaf, saya tidak mengerti. Bisa dijelaskan lebih lanjut?
