## Cross Validation

In [8]:
import numpy as np
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split, cross_val_score

In [3]:
iris = datasets.load_iris()
iris.data.shape, iris.target.shape

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.4, random_state=0)
print(X_train.shape, y_train.shape)
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
print(clf.score(X_test, y_test))

(90, 4) (90,)
0.966666666667


In [4]:
scores = cross_val_score(clf, iris.data, iris.target, cv=5)
print(scores)

[ 0.96666667  1.          0.96666667  0.96666667  1.        ]


## K-Fold

In [5]:
import numpy as np
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)

In [43]:
for index_train, index_test in kf.split(iris.data):
    X_train, X_test = iris.data[index_train], iris.data[index_test]
    y_train, y_test = iris.target[index_train], iris.target[index_test]
    clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
    print(clf.score(X_test, y_test))

1.0
1.0
0.866666666667
1.0
0.866666666667


## Stratified K-Fold (Accuracy, Precision, Recall, F1-Score)

In [46]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_score, recall_score, f1_score
skf = StratifiedKFold(n_splits=5)
for index_train, index_test in skf.split(iris.data, iris.target):
    X_train, X_test = iris.data[index_train], iris.data[index_test]
    y_train, y_test = iris.target[index_train], iris.target[index_test]
    clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
    y_predict = clf.predict(X_test)
    accuracy = clf.score(X_test, y_test)
    p_score = precision_score(y_test, y_predict, average='macro')
    r_score = recall_score(y_test, y_predict, average='macro')
    f_score = f1_score(y_test, y_predict, average='macro')
    print("=======================")
    print("Accuracy: ", accuracy)
    print("Precision: ", p_score)
    print("Recall: ",r_score)
    print("F1-Score: ", f_score)

Accuracy:  0.966666666667
Precision:  0.969696969697
Recall:  0.966666666667
F1-Score:  0.966583124478
Accuracy:  1.0
Precision:  1.0
Recall:  1.0
F1-Score:  1.0
Accuracy:  0.966666666667
Precision:  0.969696969697
Recall:  0.966666666667
F1-Score:  0.966583124478
Accuracy:  0.966666666667
Precision:  0.969696969697
Recall:  0.966666666667
F1-Score:  0.966583124478
Accuracy:  1.0
Precision:  1.0
Recall:  1.0
F1-Score:  1.0


## Leave one out

In [47]:
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
print("How many out ", loo.get_n_splits(iris.target))
for index_train, index_test in loo.split(iris.target):
    X_train, X_test = iris.data[index_train], iris.data[index_test]
    y_train, y_test = iris.target[index_train], iris.target[index_test]
    clf = svm.SVC(kernel='linear', C=1).fit(X_train, x_test)
    y_predict = clf.predict(X_test)
    accuracy = clf.score(X_test, y_test)
    print("Accuracy: ", accuracy)

How many out  150
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accuracy:  1.0
Accurac