In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = load_breast_cancer()
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [2]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)

# tree.plot_tree(clf) 

DecisionTreeClassifier()

In [3]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

y_pred = clf.predict(X_test)
print("test set")
print("accurcy: %f" % accuracy_score(y_test, y_pred))
print("precision: %f" % precision_score(y_test, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_test, y_pred))
print("f1 scorce: %f" % f1_score(y_test, y_pred))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred))

test set
accurcy: 0.929825
precision: 0.952830
recall: 0.935185
f1 scorce: 0.943925
confusion matrix:
 [[ 58   5]
 [  7 101]]


In [4]:
y_pred = clf.predict(X_train)
print("training set")
print("accurcy: %f" % accuracy_score(y_train, y_pred))
print("precision: %f" % precision_score(y_train, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_train, y_pred))
print("f1 scorce: %f" % f1_score(y_train, y_pred))
print("confusion matrix:\n", confusion_matrix(y_train, y_pred))

training set
accurcy: 1.000000
precision: 1.000000
recall: 1.000000
f1 scorce: 1.000000
confusion matrix:
 [[149   0]
 [  0 249]]


### max depth=4

In [5]:
clf4 = tree.DecisionTreeClassifier(max_depth=4)
clf4.fit(X_train, y_train)
y_pred = clf4.predict(X_test)
print("testing set")
print("accurcy: %f" % accuracy_score(y_test, y_pred))
print("precision: %f" % precision_score(y_test, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_test, y_pred))
print("f1 scorce: %f" % f1_score(y_test, y_pred))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred))
y_pred = clf4.predict(X_train)
print("\ntraining set")
print("accurcy: %f" % accuracy_score(y_train, y_pred))
print("precision: %f" % precision_score(y_train, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_train, y_pred))
print("f1 scorce: %f" % f1_score(y_train, y_pred))
print("confusion matrix:\n", confusion_matrix(y_train, y_pred))

testing set
accurcy: 0.959064
precision: 0.971963
recall: 0.962963
f1 scorce: 0.967442
confusion matrix:
 [[ 60   3]
 [  4 104]]

training set
accurcy: 0.994975
precision: 0.992032
recall: 1.000000
f1 scorce: 0.996000
confusion matrix:
 [[147   2]
 [  0 249]]


### max_depth=7

In [6]:
clf7 = tree.DecisionTreeClassifier(max_depth=7)
clf7.fit(X_train, y_train)
y_pred = clf7.predict(X_test)
print("testing set")
print("accurcy: %f" % accuracy_score(y_test, y_pred))
print("precision: %f" % precision_score(y_test, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_test, y_pred))
print("f1 scorce: %f" % f1_score(y_test, y_pred))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred))
y_pred = clf7.predict(X_train)
print("\ntraining set")
print("accurcy: %f" % accuracy_score(y_train, y_pred))
print("precision: %f" % precision_score(y_train, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_train, y_pred))
print("f1 scorce: %f" % f1_score(y_train, y_pred))
print("confusion matrix:\n", confusion_matrix(y_train, y_pred))

testing set
accurcy: 0.923977
precision: 0.961165
recall: 0.916667
f1 scorce: 0.938389
confusion matrix:
 [[59  4]
 [ 9 99]]

training set
accurcy: 1.000000
precision: 1.000000
recall: 1.000000
f1 scorce: 1.000000
confusion matrix:
 [[149   0]
 [  0 249]]


In [10]:
clf11 = tree.DecisionTreeClassifier(max_depth=10)
clf11.fit(X_train, y_train)
y_pred = clf11.predict(X_test)
print("testing set")
print("accurcy: %f" % accuracy_score(y_test, y_pred))
print("precision: %f" % precision_score(y_test, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_test, y_pred))
print("f1 scorce: %f" % f1_score(y_test, y_pred))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred))
y_pred = clf11.predict(X_train)
print("\ntraining set")
print("accurcy: %f" % accuracy_score(y_train, y_pred))
print("precision: %f" % precision_score(y_train, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_train, y_pred))
print("f1 scorce: %f" % f1_score(y_train, y_pred))
print("confusion matrix:\n", confusion_matrix(y_train, y_pred))

testing set
accurcy: 0.923977
precision: 0.961165
recall: 0.916667
f1 scorce: 0.938389
confusion matrix:
 [[59  4]
 [ 9 99]]

training set
accurcy: 1.000000
precision: 1.000000
recall: 1.000000
f1 scorce: 1.000000
confusion matrix:
 [[149   0]
 [  0 249]]


In [8]:
cls = tree.DecisionTreeClassifier()
param_grid = {
    'max_depth': [4,5,6,7,8,9,10,11,12,13,14]}
model = GridSearchCV(cls, param_grid=param_grid, scoring='accuracy', cv=10, n_jobs=-1)

model.fit(X_train, y_train)

print('Best Params:', model.best_params_)

Best Params: {'max_depth': 11}


In [9]:
y_pred = clf.predict(X_test)
print("testing set")
print("accurcy: %f" % accuracy_score(y_test, y_pred))
print("precision: %f" % precision_score(y_test, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_test, y_pred))
print("f1 scorce: %f" % f1_score(y_test, y_pred))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred))
y_pred = clf.predict(X_train)
print("\ntraining set")
print("accurcy: %f" % accuracy_score(y_train, y_pred))
print("precision: %f" % precision_score(y_train, y_pred, zero_division=0))
print("recall: %f" % recall_score(y_train, y_pred))
print("f1 scorce: %f" % f1_score(y_train, y_pred))
print("confusion matrix:\n", confusion_matrix(y_train, y_pred))

testing set
accurcy: 0.929825
precision: 0.952830
recall: 0.935185
f1 scorce: 0.943925
confusion matrix:
 [[ 58   5]
 [  7 101]]

training set
accurcy: 1.000000
precision: 1.000000
recall: 1.000000
f1 scorce: 1.000000
confusion matrix:
 [[149   0]
 [  0 249]]
