In [1]:
from sklearn.datasets import load_breast_cancer

In [2]:
X, y = load_breast_cancer(return_X_y = True)

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score, LeaveOneOut, ShuffleSplit

In [7]:
model = DecisionTreeClassifier()

In [8]:
cv = ShuffleSplit(n_splits=5, test_size=0.25, random_state=0)

In [9]:
scores = cross_val_score(model, X_train, y_train, cv=cv)

In [10]:
scores

array([0.92105263, 0.95614035, 0.90350877, 0.87719298, 0.95614035])

In [11]:
scores.mean()

0.9228070175438596

In [12]:
# Leave one out
scores = cross_val_score(model, X_train, y_train, cv=LeaveOneOut())

In [13]:
scores.mean()

0.9406593406593406

In [14]:
# 10-fold cross validation
for i in range(1,11):
    model = DecisionTreeClassifier(max_depth=i)
    scores = cross_val_score(model, X_train, y_train, cv=10)
    print(f'Tree depth: {i}, Average score: {scores.mean()}')

Tree depth: 1, Average score: 0.883671497584541
Tree depth: 2, Average score: 0.9254589371980677
Tree depth: 3, Average score: 0.9363768115942029
Tree depth: 4, Average score: 0.9320289855072463
Tree depth: 5, Average score: 0.9297101449275363
Tree depth: 6, Average score: 0.9317874396135266
Tree depth: 7, Average score: 0.938647342995169
Tree depth: 8, Average score: 0.9383574879227053
Tree depth: 9, Average score: 0.9143961352657005
Tree depth: 10, Average score: 0.9318840579710145


In [15]:
model = DecisionTreeClassifier(max_depth=7)

In [16]:
model.fit(X_train, y_train)

In [17]:
model.score(X_train, y_train)

0.9956043956043956

In [18]:
model.score(X_test, y_test)

0.9473684210526315