# Explorasi Decision Tree Classifier

In [1]:
# Load libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.model_selection import cross_validate
import pickle

In [2]:
# Load the data
breast_cancer = load_breast_cancer()

# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(breast_cancer.data, breast_cancer.target, test_size=0.2, random_state=42)

In [3]:
# Make an object and fit the data
DTC = DecisionTreeClassifier()
DTC.fit(X_train, y_train)

DecisionTreeClassifier()

In [4]:
# Save the model
with open('dtc_model.pkl', 'wb') as f:
    pickle.dump(DTC, f)

In [5]:
# Load the model
with open('dtc_model.pkl', 'rb') as f:
    DTC = pickle.load(f)

In [6]:
# Predict result
y_pred = DTC.predict(X_test)

In [7]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.93      0.91      0.92        43
           1       0.94      0.96      0.95        71

    accuracy                           0.94       114
   macro avg       0.94      0.93      0.93       114
weighted avg       0.94      0.94      0.94       114



In [8]:
# Hasil Tree
dtree = export_text(DTC, feature_names=list(breast_cancer["feature_names"]))
print(dtree)

|--- mean concave points <= 0.05
|   |--- worst radius <= 16.83
|   |   |--- area error <= 48.70
|   |   |   |--- worst smoothness <= 0.18
|   |   |   |   |--- smoothness error <= 0.00
|   |   |   |   |   |--- mean texture <= 19.90
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- mean texture >  19.90
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- smoothness error >  0.00
|   |   |   |   |   |--- worst texture <= 33.35
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- worst texture >  33.35
|   |   |   |   |   |   |--- worst texture <= 33.56
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- worst texture >  33.56
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |--- worst smoothness >  0.18
|   |   |   |   |--- class: 0
|   |   |--- area error >  48.70
|   |   |   |--- symmetry error <= 0.02
|   |   |   |   |--- class: 0
|   |   |   |--- symmetry error >  0.02
|   |   |   |   |--- class: 1
|   |--- worst radius >  16.83
|

In [9]:
cv_result = cross_validate(DecisionTreeClassifier(), X_train, y_train, cv=10, return_estimator=True)

In [10]:
DTC_CV = cv_result["estimator"][cv_result["test_score"].argmax()]

In [11]:
y_pred_cv = DTC_CV.predict(X_test)

In [12]:
report = classification_report(y_test, y_pred_cv)
print(report)

              precision    recall  f1-score   support

           0       0.91      0.95      0.93        43
           1       0.97      0.94      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.95      0.94       114
weighted avg       0.95      0.95      0.95       114

