In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [7]:
cancer_data = load_breast_cancer()
X = cancer_data.data
y = cancer_data.target

In [8]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [9]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dt_model = DecisionTreeClassifier()

In [10]:
# Define the hyperparameter grid for tuning
parameters = {'criterion': ['gini', 'entropy'],
              'max_depth': [None, 5, 10, 15, 20],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4]}


In [11]:
grid_cv = GridSearchCV(dt_model, parameters, scoring='accuracy', cv=5)
grid_cv.fit(X_train, y_train)


In [12]:
best_criterion = grid_cv.best_params_['criterion']
best_max_depth = grid_cv.best_params_['max_depth']
best_min_samples_split = grid_cv.best_params_['min_samples_split']
best_min_samples_leaf = grid_cv.best_params_['min_samples_leaf']


print(f'Best criterion: {best_criterion}')
print(f'Best max_depth: {best_max_depth}')
print(f'Best min_samples_split: {best_min_samples_split}')
print(f'Best min_samples_leaf: {best_min_samples_leaf}')

Best criterion: entropy
Best max_depth: 20
Best min_samples_split: 10
Best min_samples_leaf: 2


In [13]:
best_model = grid_cv.best_estimator_
y_pred = best_model.predict(X_test)

In [14]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy on test set: {accuracy:.2f}')
print(f'Classification Report:\n{report}')

Accuracy on test set: 0.95
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.91      0.93        43
           1       0.95      0.97      0.96        71

    accuracy                           0.95       114
   macro avg       0.95      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

