What is cross validation?
It splits your data into multiple groups and tests your model on different combinations of those groups to make 
sure it's not just lucky on one set

1. K-Fold cross-validation

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

data = load_iris()
x = data.data
y = data.target

model = LogisticRegression(max_iter=200)

scores = cross_val_score(model, x, y, cv=5)
print(scores)
print('avg accuracy:', scores.mean())

[0.96666667 1.         0.93333333 0.96666667 1.        ]
avg accuracy: 0.9733333333333334


2. stratified k-fold cross validation
---In regular k-fold, the class distribution might not be equal in each fold. That's a problem when you have imbalanced classes.(eg: more cats than dogs)

In [2]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import numpy as np

skf = StratifiedKFold(n_splits=5)
model = LogisticRegression(max_iter=200)
scores = []

for train_index, test_index in skf.split(x, y):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(x_train, y_train)
    preds = model.predict(x_test)
    score = accuracy_score(y_test, preds)
    scores.append(score)

print(scores)
print(np.mean(scores))

[0.9666666666666667, 1.0, 0.9333333333333333, 0.9666666666666667, 1.0]
0.9733333333333334


3. Leave-One-Out(loocv)

In [None]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()
model = LogisticRegression(max_iter=200)
scores = []

for train_index, test_index in loo.split(x, y):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(x_train, y_train)
    preds = model.predict(x_test)
    score = accuracy_score(y_test, preds)
    scores.append(score)

print(scores)
print(np.mean(scores))

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
0.9666666666666667


In [None]:
print('gokul')