In [1]:
import pandas as pd
import numpy as np


## Load Dataset

In [None]:
df = pd.read_csv('kidney_disease.csv')
df.sample()

In [3]:
df = df.drop(['id', 'age'], axis=1)

In [None]:
df.sample()

In [None]:
df.info()

In [9]:
numerical = []
catgcols =[]

for col in df.columns:
    if df[col].dtype == "float64":
        numerical.append(col)
    else:
        catgcols.append(col)

for col in df.columns:
    if col in numerical:
        df[col] = df[col].fillna(df[col].median())
    else:
        df[col] = df[col].fillna(df[col].mode()[0])

In [None]:
numerical

In [None]:
catgcols

In [None]:
df['classification'].value_counts()

In [16]:
df['classification'] = df['classification'].replace(['ckd\t'], 'ckd')

In [None]:
df['classification'].value_counts()

In [19]:
ind_col = [col for col in df.columns if col != 'classification']
dep_col = 'classification'

In [None]:
df[dep_col].value_counts()

## Transformasi Dataset

In [21]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

for col in catgcols:
    df[col] = le.fit_transform(df[col])

In [22]:
df['classification'] = le.fit_transform(df['classification'])

In [23]:
x = df[ind_col] # feature
y = df[dep_col] # label

In [None]:
df.head()

In [35]:
df.to_csv('kidney-disease.csv', index=False)

In [26]:
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

## Memuat Model Decision Tree

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

dtc = DecisionTreeClassifier(
    ccp_alpha=0.0, class_weight=None, criterion='entropy',
    max_depth=4, max_features=None, max_leaf_nodes=None,
    min_impurity_decrease=0.0, min_samples_leaf=1,
    min_samples_split=2, min_weight_fraction_leaf=0.0,
    random_state=42, splitter='best'
)

model = dtc.fit(x_train, y_train)

dtc_acc = accuracy_score(y_test, dtc.predict(x_test))

print(f"Akurasi data training adalah = {accuracy_score(y_train, dtc.predict(x_train))}")
print(f"Akurasi data testing adalah = {dtc_acc} \n")

print(f"Confusion Matrix : \n{confusion_matrix(y_test, dtc.predict(x_test))} \n")
confusion = confusion_matrix(y_test, dtc.predict(x_test))
tn, fp, fn, tp = confusion.ravel()
print(f"Classification report : \n {classification_report(y_test, dtc.predict(x_test))}")

## Simulasi Model

In [None]:
input_data = (80, 1.02, 1, 0, 1, 1, 0, 0, 121, 36, 1.2, 138, 4.4, 15.4, 32, 72, 34, 1, 4, 1, 0, 0, 0)

input_data_as_numpy_array = np.array(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped) 
print(prediction)

if prediction[0] == 0:
    print('Pasien tidak terkena penyakit ginjal')
else:
    print('Pasien terkena penyakit ginjal')

## Visualisasi Pohon Keputusan

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(model,
                     feature_names=ind_col,
                     class_names=['notckd', 'ckd'],
                     filled=True)