In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [2]:
# load the data set

data = load_breast_cancer()

X = data.data
Y = data.target

In [3]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]], shape=(569, 30))

In [4]:
Y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [5]:
# train and test the data set

X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.25,random_state=42)

In [6]:
# model 1

dt_entropy  = DecisionTreeClassifier(criterion="entropy")
dt_entropy.fit(X_train,y_train)
y_pred_ent = dt_entropy.predict(X_test)

In [7]:
# model 2 

dt_gini = DecisionTreeClassifier(criterion='gini')
dt_gini.fit(X_train,y_train)
y_pred_gini = dt_gini.predict(X_test)

In [8]:
# confusion metrix 

cm_gini = confusion_matrix(y_test,y_pred_gini)

In [9]:
cm_enrp = confusion_matrix(y_test,y_pred_ent)

In [10]:
# metric function 

def calculate_metric(y_test,y_pred):

    return {
        "Accuracy": accuracy_score(y_test,y_pred),
        "precision score":precision_score(y_test,y_pred),
        "f1 score":f1_score(y_test,y_pred),
        "Recall":recall_score(y_test,y_pred)
    }

In [11]:
metric_enrp = calculate_metric(y_test,y_pred_ent)
metric_gini = calculate_metric(y_test,y_pred_gini)

In [12]:

# Display Results
print("Confusion Matrix - Entropy:")
print(cm_enrp)

print("\nConfusion Matrix - Gini:")
print(cm_gini)

Confusion Matrix - Entropy:
[[50  4]
 [ 2 87]]

Confusion Matrix - Gini:
[[51  3]
 [ 4 85]]


In [13]:
comparison = pd.DataFrame([metric_enrp, metric_gini],
                          index=["Entropy(ID3)", "Gini(CART)"])



In [14]:
print("\nPerformance Comparison:")
print(comparison)


Performance Comparison:
              Accuracy  precision score  f1 score    Recall
Entropy(ID3)  0.958042         0.956044  0.966667  0.977528
Gini(CART)    0.951049         0.965909  0.960452  0.955056
