In [78]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Library yang digunakan**

In [79]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

import joblib

# **Persiapan data**

**Load Dataset**

In [80]:
df = pd.read_csv('/content/drive/MyDrive/CattleDiag/Dataset/dataset.csv')
weight = pd.read_csv('/content/drive/MyDrive/CattleDiag/Dataset/symptom_severity.csv')

**Missing Value**

In [81]:
# Mendapatkan nama kolom dari data frame
cols = df.columns
data = df[cols].values.flatten()

s = pd.Series(data)
s = s.str.strip()
s = s.values.reshape(df.shape)

df = pd.DataFrame(s, columns = df.columns)
# Mengisi data yang kosong dengan nilai 0
df = df.fillna(value=0)

**Mengisi data gejala (Symptom) dengan nilai bobotnya (weight)**

In [82]:
vals = df.values
symptoms = weight['Gejala'].unique()

for i in range(len(symptoms)):
    vals[vals == symptoms[i]] = weight[weight['Gejala'] == symptoms[i]]['Bobot'].values[0]

#df.head()

**Memisahkan antara data dan label**

In [83]:
(df[cols] == 0).all()

df['Penyakit'].value_counts()

df['Penyakit'].unique()

data = df.iloc[:,1:].values
labels = df['Penyakit'].values

df.head()

Unnamed: 0,Penyakit,Gejala 1,Gejala 2,Gejala 3,Gejala 4,Gejala 5,Gejala 6,Gejala 7,Gejala 8,Gejala 9,Gejala 10,Gejala 11,Gejala 12,Gejala 13,Gejala 14,Gejala 15,Gejala 16,Gejala 17,Gejala 18,Gejala 19
0,Pneumonia,2.657395,2.657395,4.647095,7.657395,2.886182,4.647095,7.657395,3.141945,0,0,0,0,0,0,0,0,0,0,0
1,Pneumonia,2.657395,2.657395,4.647095,7.657395,2.886182,4.647095,7.657395,0.0,0,0,0,0,0,0,0,0,0,0,0
2,Pneumonia,2.657395,2.657395,4.647095,7.657395,2.886182,4.647095,3.141945,0.0,0,0,0,0,0,0,0,0,0,0,0
3,Pneumonia,2.657395,2.657395,4.647095,7.657395,2.886182,7.657395,3.141945,0.0,0,0,0,0,0,0,0,0,0,0,0
4,Pneumonia,2.657395,2.657395,4.647095,7.657395,4.647095,7.657395,3.141945,0.0,0,0,0,0,0,0,0,0,0,0,0


**Membagi dataset**

In [84]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, shuffle=True, train_size = 0.85)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(3422, 19) (604, 19) (3422,) (604,)


# **Modeling**

**Modeling Support Vector Machine**

In [91]:
svc = SVC()
svc.fit(x_train, y_train)
joblib.dump(svc, "/content/drive/MyDrive/CattleDiag/svc.pkl")

pred_svc = svc.predict(x_test)
# print(pred_svc)

conf_mat = confusion_matrix(y_test, pred_svc)
print('SVC F1-score% =', f1_score(y_test, pred_svc, average='macro')*100, '|', 'SVC Accuracy% =', accuracy_score(y_test, pred_svc)*100)
#SVC F1-score% = 82.15626878016697 | SVC Accuracy% = 82.6158940397351

SVC F1-score% = 82.15626878016697 | SVC Accuracy% = 82.6158940397351


**Modeling Neural Networks**

Konversi vektor ke matriks kelas biner

In [86]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

encoder =  LabelEncoder()
y_train_e = encoder.fit_transform(y_train)
y_train_c = to_categorical(y_train_e, num_classes = 41)

y_test_e = encoder.fit_transform(y_test)
y_test_c = to_categorical(y_test_e, num_classes = 41)

joblib.dump(encoder, "/content/drive/MyDrive/CattleDiag/encoder.pkl")
np.save('/content/drive/MyDrive/CattleDiag/classes.npy', encoder.classes_)

In [87]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

# Initialising the RNN
model = Sequential()

# Adding the first LSTM layer and some Dropout regularisation
# Adding a second LSTM layer and some Dropout regularisation
# Adding a third LSTM layer and some Dropout regularisation
# Adding a fourth LSTM layer and some Dropout regularisation
# Adding the output layer
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=50))
model.add(Dropout(0.2))

model.add(Dense(units=1))

In [88]:
#Building the RNN
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import datasets, layers, models

# pembuatan model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(41, activation='softmax')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

# normalisasi tipe data NumPy arrays
x_train = x_train.astype(np.float32)
y_train_c = y_train_c.astype(np.float32)
x_test = x_test.astype(np.float32)
y_test_c = y_test_c.astype(np.float32)

# konversi NumPy arrays ke TensorFlow tensors
x_train_tf = tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train_c_tf = tf.convert_to_tensor(y_train_c, dtype=tf.float32)
x_test_tf = tf.convert_to_tensor(x_test, dtype=tf.float32)
y_test_c_tf = tf.convert_to_tensor(y_test_c, dtype=tf.float32)

# Melatih model dengan data yang telah diproses
epochs = 1000
batch_size = 32
model.fit(x_train_tf, y_train_c_tf, epochs=epochs, batch_size=batch_size)

# Menyimpan model jika diperlukan
model.save('/content/drive/MyDrive/CattleDiag/cattlediag_tf.h5')


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

  saving_api.save_model(


In [89]:
score = model.evaluate(x_test, y_test_c, batch_size=batch_size)

y_pred = model.predict(x_test)

actual = np.argmax(y_test_c, axis=1)
predicted = np.argmax(y_pred, axis=1)

print(f"Actual: {actual}")
print(f"Predicted: {predicted}")

print('Neural Network F1-scores% =', f1_score(actual, predicted, average='macro') * 100, '|', 'Neural Network Accuracy% =', accuracy_score(actual, predicted) * 100)
print('Score Accuracy = ', score)
#Neural Network F1-scores% = 82.5431270879217 | Neural Network Accuracy% = 85.59602649006622


Actual: [ 7 14 10  0 27 10 11  1 25  8  2  4 12  5  7  0 23  5 21 18 20 23 16 19
 22 22  8  8 11 29  5 28 29 24 32 23  6 11 15 19 19 14  8 10  5 20  6 32
  9 28  1  3 20 17 31  1  3 28 28  8  1 20  3 15  9 32  4 19 25 17 28 21
 31 22 18 17 11 17 22 19  2 14  7 11 31 15 20  2 23 22 29  5 27 14 32 12
  2 32 25 10 27  4 19  2 20 19 23 26  2 14 13 32 18 25 12  1  9 32 13 24
 10 11 21  0 25 16  4  9  3 10 10 14 14 26  7 11  4 19 10 24  4 16 20 20
 31 29  2 18 13 26 26  5  4 25 22 23 13 10  1 28  2 18 32 16  3 23  7 11
 29  5  8 11  8 20 20 11 25 21 26 26 29 24 19 31  7 16  9 27 10 11 21  2
 11  1 23 31 26 19  4 29 14 12 20 12  6 20 32 31 20 30 13 14 24  9  6  5
  3 14  5 32  3 30 26 16 20  1 20  8  0  4 32 17 12 18  5 27 24  7 29  9
  1 15 22  5 25 32 30 19 14 20 18 20 14 22 27  8 30  1 14  6 28 23 28 23
  1  6  5 25  4 15 22 14 25  8 16 13 16 16 14 26 28 20 31 23  4 14 10 26
 29 19 16 17 10 16 19 18  1 18  2  9 21 29 31  8 16 24 32  6 20 31  0  4
  2 31 11 29 20  0  8 30  6  4  9 11 21  5 