In [6]:
import pandas as pd

# Load the dataset
file_path = 'animal_disease_dataset.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data.head()

Unnamed: 0,Animal,Age,Temperature,Symptom 1,Symptom 2,Symptom 3,Disease
0,cow,3,103.1,depression,painless lumps,loss of appetite,pneumonia
1,buffalo,13,104.5,painless lumps,loss of appetite,depression,lumpy virus
2,sheep,1,100.5,depression,painless lumps,loss of appetite,lumpy virus
3,cow,14,100.3,loss of appetite,swelling in limb,crackling sound,blackleg
4,sheep,2,103.6,painless lumps,loss of appetite,depression,pneumonia


In [7]:
# Hapus kolom 'Age' dan 'Temperature'
data_modified = data.drop(columns=['Age', 'Temperature'])

# Tampilkan dataset yang telah dimodifikasi
data = data_modified
data.head()

Unnamed: 0,Animal,Symptom 1,Symptom 2,Symptom 3,Disease
0,cow,depression,painless lumps,loss of appetite,pneumonia
1,buffalo,painless lumps,loss of appetite,depression,lumpy virus
2,sheep,depression,painless lumps,loss of appetite,lumpy virus
3,cow,loss of appetite,swelling in limb,crackling sound,blackleg
4,sheep,painless lumps,loss of appetite,depression,pneumonia


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Check for missing values
missing_values = data.isnull().sum()

# Display missing values
missing_values

Animal       0
Symptom 1    0
Symptom 2    0
Symptom 3    0
Disease      0
dtype: int64

In [9]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

# Separate features and target variable
X = data.drop('Disease', axis=1)
y = data['Disease']

# Define the column transformer with one-hot encoding for categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['Animal', 'Symptom 1', 'Symptom 2', 'Symptom 3'])
    ],
    remainder='passthrough'
)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply preprocessing
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Display the shapes of the processed data
X_train.shape, X_test.shape


((35022, 76), (8756, 76))

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Display the accuracy and classification report
accuracy, report


(0.8338282320694381,
 '                precision    recall  f1-score   support\n\n       anthrax       1.00      1.00      1.00      1972\n      blackleg       1.00      1.00      1.00      1963\nfoot and mouth       1.00      1.00      1.00      1963\n   lumpy virus       0.49      0.28      0.36      1443\n     pneumonia       0.49      0.71      0.58      1415\n\n      accuracy                           0.83      8756\n     macro avg       0.80      0.80      0.79      8756\n  weighted avg       0.83      0.83      0.83      8756\n')

In [11]:
import joblib

# Simpan model ke file .pkl
model_filename = 'animal_disease_model.pkl'
joblib.dump(model, model_filename)

# Simpan preprocessor ke file .pkl
preprocessor_filename = 'preprocessor.pkl'
joblib.dump(preprocessor, preprocessor_filename)


['preprocessor.pkl']