## Cross Validation

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut
from sklearn import metrics
import numpy as np

In [3]:
X, y = load_breast_cancer(return_X_y=True)

### Using the Holdout Method

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [5]:
clf = RandomForestClassifier(n_estimators = 10, criterion='entropy', random_state=0)

In [6]:
clf.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

In [7]:
y_pred = clf.predict(X_test)

In [8]:
acc = metrics.accuracy_score(y_test, y_pred)
print("The accuracy is :", acc)

The accuracy is : 0.9473684210526315


### Using the K-Fold Method

In [9]:
kf = KFold(n_splits=5)
accuracies = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracies.append(metrics.accuracy_score(y_test, y_pred))
print(accuracies)
print("Cross-validation accuracy = ", np.mean(accuracies))

[0.9122807017543859, 0.9385964912280702, 0.9736842105263158, 0.9649122807017544, 0.9646017699115044]
Cross-validation accuracy =  0.9508150908244062


### Using the P-Leave out Method

In [10]:
loo = LeaveOneOut()
accuracies = []
for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracies.append(metrics.accuracy_score(y_test, y_pred))
print("Cross-validation accuracy = ", np.mean(accuracies))

Cross-validation accuracy =  0.9578207381370826
