# K-Fold Cross Validation

ref: https://scikit-learn.org/stable/modules/cross_validation.html

In [1]:
import numpy as np
from sklearn.model_selection import KFold

In [2]:
X = np.random.rand(9,2)
X

array([[0.49742047, 0.68133866],
       [0.29274157, 0.23720758],
       [0.14727213, 0.05929649],
       [0.69248567, 0.30761226],
       [0.90983503, 0.65284709],
       [0.04545768, 0.01713307],
       [0.13395379, 0.92292449],
       [0.87798942, 0.87304084],
       [0.95141114, 0.04688927]])

## Initialize k-fold method

In [3]:
kf = KFold(n_splits=5, random_state=None, shuffle=False)

## Call k-fold method

In [4]:
for train, val in kf.split(X):
    # print([train, val])
    print('train: ',X[train,:])
    print('validation: ',X[val,:],'\n')

train:  [[0.14727213 0.05929649]
 [0.69248567 0.30761226]
 [0.90983503 0.65284709]
 [0.04545768 0.01713307]
 [0.13395379 0.92292449]
 [0.87798942 0.87304084]
 [0.95141114 0.04688927]]
validation:  [[0.49742047 0.68133866]
 [0.29274157 0.23720758]] 

train:  [[0.49742047 0.68133866]
 [0.29274157 0.23720758]
 [0.90983503 0.65284709]
 [0.04545768 0.01713307]
 [0.13395379 0.92292449]
 [0.87798942 0.87304084]
 [0.95141114 0.04688927]]
validation:  [[0.14727213 0.05929649]
 [0.69248567 0.30761226]] 

train:  [[0.49742047 0.68133866]
 [0.29274157 0.23720758]
 [0.14727213 0.05929649]
 [0.69248567 0.30761226]
 [0.13395379 0.92292449]
 [0.87798942 0.87304084]
 [0.95141114 0.04688927]]
validation:  [[0.90983503 0.65284709]
 [0.04545768 0.01713307]] 

train:  [[0.49742047 0.68133866]
 [0.29274157 0.23720758]
 [0.14727213 0.05929649]
 [0.69248567 0.30761226]
 [0.90983503 0.65284709]
 [0.04545768 0.01713307]
 [0.95141114 0.04688927]]
validation:  [[0.13395379 0.92292449]
 [0.87798942 0.87304084]] 



## Stratified k-fold cross validation

In [5]:
from sklearn.model_selection import StratifiedKFold

In [6]:
X, y = np.ones((50,1)), np.hstack(([0]*45, [1]*5))
print('X: ', X)
print('y: ', y)

X:  [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
y:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 1 1 1 1]


In [7]:
kf = KFold(n_splits=3)
for train, val in kf.split(X, y):
    print(y[val])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1]


In [8]:
skf = StratifiedKFold(n_splits=3)

In [9]:
for train, val in skf.split(X, y):
    print(y[val])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
