## Cross Validation

In [2]:
from sklearn import datasets

In [5]:
X, y = datasets.load_iris(return_X_y=True)

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold, cross_val_score

In [7]:
clf = DecisionTreeClassifier(random_state=42)

In [8]:
k_folds = KFold(n_splits=5)
scores = cross_val_score(clf, X, y, cv = k_folds)

In [9]:
print("Cross Validation Scores :", scores)
print("Average cv Score :", scores.mean())
print("Number of cv scores used in average :", len(scores))

Cross Validation Scores : [1.         1.         0.83333333 0.93333333 0.8       ]
Average cv Score : 0.9133333333333333
Number of cv scores used in average : 5


## Stratified K-Fold

In [10]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score

X, y = datasets.load_iris(return_X_y=True)

clf = DecisionTreeClassifier(random_state=42)

sk_folds = StratifiedKFold(n_splits=5)

scores = cross_val_score(clf, X, y, cv = sk_folds)

print("Cross Validation scores : ", scores)
print("Average cv score : ", scores.mean())
print("Number of cv scores used in Average : ", len(scores))

Cross Validation scores :  [0.96666667 0.96666667 0.9        0.93333333 1.        ]
Average cv score :  0.9533333333333334
Number of cv scores used in Average :  5


## Leave one out (LOO)

In [11]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import LeaveOneOut, cross_val_score

X, y = datasets.load_iris(return_X_y=True)

clf = DecisionTreeClassifier(random_state=42)

loo = LeaveOneOut()

scores = cross_val_score(clf, X, y, cv = loo)

print("Cross Validation scores : ", scores)
print("Average cv score : ", scores.mean())
print("Number of cv scores used in Average : ", len(scores))

Cross Validation scores :  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]
Average cv score :  0.94
Number of cv scores used in Average :  150


## Leave P Out (LPO)

In [13]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import LeavePOut, cross_val_score

X, y = datasets.load_iris(return_X_y=True)

clf = DecisionTreeClassifier(random_state=42)

lpo = LeavePOut(p=2)

scores = cross_val_score(clf, X, y, cv = lpo)

print("Cross Validation scores : ", scores)
print("Average cv score : ", scores.mean())
print("Number of cv scores used in Average : ", len(scores))

Cross Validation scores :  [1. 1. 1. ... 1. 1. 1.]
Average cv score :  0.9382997762863534
Number of cv scores used in Average :  11175


## Shuffle Split

In [14]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import ShuffleSplit, cross_val_score

X, y = datasets.load_iris(return_X_y=True)

clf = DecisionTreeClassifier(random_state=42)

ss = ShuffleSplit(train_size=0.6, test_size=0.3, n_splits=5)

scores = cross_val_score(clf, X, y, cv = ss)

print("Cross Validation Scores: ", scores)
print("Average CV Score: ", scores.mean())
print("Number of CV Scores used in Average: ", len(scores))



Cross Validation Scores:  [0.95555556 0.93333333 0.91111111 0.93333333 0.93333333]
Average CV Score:  0.9333333333333333
Number of CV Scores used in Average:  5
