In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.datasets import load_iris

import numpy as np
import pandas as pd

In [2]:
iris = load_iris()
iris_df = pd.DataFrame(data = iris.data, columns=iris.feature_names)
iris_df['label'] = iris.target
iris_df['label'].value_counts()

0    50
1    50
2    50
Name: label, dtype: int64

In [3]:
Kfold = KFold(n_splits=3)
dt_clf = DecisionTreeClassifier(random_state=156)
kfold_cv_accuracy = []

n_iter = 0
for train_index, test_index in Kfold.split(iris_df):
    n_iter += 1

    feature_train = iris_df[iris.feature_names].iloc[train_index]
    feature_test = iris_df[iris.feature_names].iloc[test_index]
    label_train = iris_df['label'].iloc[train_index]
    label_test = iris_df['label'].iloc[test_index]

    dt_clf.fit(feature_train, label_train)
    pred = dt_clf.predict(feature_test)
    accuracy = accuracy_score(pred, label_test)
    kfold_cv_accuracy.append(accuracy)

    print('Cross Valid Test #{}:'.format(n_iter))
    print('Train data label values:\n', label_train.value_counts(), sep='')
    print('Test data label values:\n', label_test.value_counts(), sep='')
    print('Cross Validation Accuracy: {}'.format(accuracy))
    print('-------------------------')

print('Average of Cross Validation Accuraacy:', np.mean(kfold_cv_accuracy))

Cross Valid Test #1:
Train data label values:
1    50
2    50
Name: label, dtype: int64
Test data label values:
0    50
Name: label, dtype: int64
Cross Validation Accuracy: 0.0
-------------------------
Cross Valid Test #2:
Train data label values:
0    50
2    50
Name: label, dtype: int64
Test data label values:
1    50
Name: label, dtype: int64
Cross Validation Accuracy: 0.0
-------------------------
Cross Valid Test #3:
Train data label values:
0    50
1    50
Name: label, dtype: int64
Test data label values:
2    50
Name: label, dtype: int64
Cross Validation Accuracy: 0.0
-------------------------
Average of Cross Validation Accuraacy: 0.0


In [4]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=3)

dt_clf = DecisionTreeClassifier(random_state=156)
skf_cv_accuracy = []

n_iter = 0
for train_index, test_index in skf.split(iris_df, iris_df['label']):
    n_iter += 1

    feature_train = iris_df[iris.feature_names].iloc[train_index]
    feature_test = iris_df[iris.feature_names].iloc[test_index]
    label_train = iris_df['label'].iloc[train_index]
    label_test = iris_df['label'].iloc[test_index]

    dt_clf.fit(feature_train, label_train)
    pred = dt_clf.predict(feature_test)
    accuracy = accuracy_score(pred, label_test)
    skf_cv_accuracy.append(accuracy)

    print('Cross Valid Test #{}:'.format(n_iter))
    print('Train data label values:\n', label_train.value_counts(), sep='')
    print('Test data label values:\n', label_test.value_counts(), sep='')
    print('Cross Validation Accuracy: {}'.format(accuracy))
    print('-------------------------')

print('Average of Cross Validation Accuraacy:', np.mean(skf_cv_accuracy))


Cross Valid Test #1:
Train data label values:
2    34
0    33
1    33
Name: label, dtype: int64
Test data label values:
0    17
1    17
2    16
Name: label, dtype: int64
Cross Validation Accuracy: 0.98
-------------------------
Cross Valid Test #2:
Train data label values:
1    34
0    33
2    33
Name: label, dtype: int64
Test data label values:
0    17
2    17
1    16
Name: label, dtype: int64
Cross Validation Accuracy: 0.94
-------------------------
Cross Valid Test #3:
Train data label values:
0    34
1    33
2    33
Name: label, dtype: int64
Test data label values:
1    17
2    17
0    16
Name: label, dtype: int64
Cross Validation Accuracy: 0.98
-------------------------
Average of Cross Validation Accuraacy: 0.9666666666666667


In [5]:
from sklearn.model_selection import StratifiedKFold

iris = load_iris()
features = iris.data
label = iris.target
skfold = StratifiedKFold(n_splits=3)

dt_clf = DecisionTreeClassifier(random_state=156)
cv_accuracy = []

n_iter = 0
for train_index, test_index in skf.split(features, label):
    n_iter += 1

    X_train, X_test = features[train_index], features[test_index]
    Y_train, Y_test = label[train_index], label[test_index]

    dt_clf.fit(X_train, Y_train)
    pred = dt_clf.predict(X_test)
    accuracy = np.round(accuracy_score(pred, Y_test), 4)
    cv_accuracy.append(accuracy)

    print('Cross Valid Test #{}:'.format(n_iter))
    print('Cross Validation Accuracy: {}'.format(accuracy))
    print('-------------------------')

print('Average of Cross Validation Accuraacy:', np.mean(cv_accuracy))


Cross Valid Test #1:
Cross Validation Accuracy: 0.98
-------------------------
Cross Valid Test #2:
Cross Validation Accuracy: 0.94
-------------------------
Cross Valid Test #3:
Cross Validation Accuracy: 0.98
-------------------------
Average of Cross Validation Accuraacy: 0.9666666666666667


>cross_val_score()

In [6]:
from sklearn.model_selection import cross_val_score, cross_validate

dt_clf = DecisionTreeClassifier(random_state=156)

scores = cross_val_score(dt_clf, features, y=label, scoring='accuracy', cv=3)
print('Cross Validation Accuracy:', np.round(scores, 4))
print('Mean Accuracy:', np.round(np.mean(scores), 4))


Cross Validation Accuracy: [0.98 0.94 0.98]
Mean Accuracy: 0.9667
