In [1]:
import numpy as np

In [2]:
fake_data = np.random.randint(0,100,100).reshape(10,10)
fake_data

array([[ 4, 91,  9, 37, 28, 29,  3, 43, 71, 59],
       [55, 19, 38, 83, 21, 92, 85, 82, 91, 19],
       [ 1, 49, 31, 86, 43, 80, 44, 87, 73, 73],
       [29, 97,  6, 73, 40, 17, 16, 97, 50, 14],
       [13, 10, 56, 21, 24, 47, 43, 96, 57, 35],
       [21, 41,  3, 72, 34, 65, 89, 14, 83, 63],
       [ 0, 70, 39, 80, 83, 67, 81, 36, 26, 79],
       [65,  9, 17, 19, 58, 13,  3, 92, 11, 72],
       [93, 64, 84, 37, 23, 34, 15, 48,  9, 48],
       [13, 28, 93, 54, 77, 58, 75, 72, 37, 48]])

In [211]:
def cross_validation(data, shuffle=True, folds=2, random_state=None, verbose=True):
    '''return train and test sets'''
        
    # error checking
    assert type(shuffle) == bool, 'shuffle must be True or False!'
    assert type(folds) == int, 'must be an integer'
    assert folds > 1, 'must have 2 or more folds'
    assert folds <= len(data), 'can have no more than {} folds!'.format(len(data))
    if random_state != None:
        assert type(random_state) == int, 'random_state must be an integer!'
    
    # reproducibility
    if random_state:
        np.random.seed(random_state)
    
    # setup indices
    indices = np.arange(data.shape[0])
    if shuffle:
        np.random.shuffle(indices)
    
    # setup K-fold
    num_indices = len(indices)
    split_proportion = 1/folds
    split = int( np.floor(split_proportion * num_indices) )
    
    # main logic
    for fold in range(folds):
        mask = np.ones(num_indices, dtype=bool)
        offset = split*fold
        if fold < folds-1:
            test_indices = indices[offset:split+offset]
        else:
            test_indices = indices[offset:]
        mask[offset:split+offset] = False
        train_indices = indices[mask]        
        train = data[train_indices]
        test = data[test_indices]
        if verbose:
            print('Fold:', fold)
            print('Train:\n', train)
            print('Test:\n', test, '\n')
    
    #return train, test

In [216]:
cross_validation(fake_data, shuffle=True, folds=3, random_state=42, verbose=True)

Fold: 0
Train:
 [[ 4 91  9 37 28 29  3 43 71 59]
 [65  9 17 19 58 13  3 92 11 72]
 [ 1 49 31 86 43 80 44 87 73 73]
 [13 28 93 54 77 58 75 72 37 48]
 [13 10 56 21 24 47 43 96 57 35]
 [29 97  6 73 40 17 16 97 50 14]
 [ 0 70 39 80 83 67 81 36 26 79]]
Test:
 [[93 64 84 37 23 34 15 48  9 48]
 [55 19 38 83 21 92 85 82 91 19]
 [21 41  3 72 34 65 89 14 83 63]] 

Fold: 1
Train:
 [[93 64 84 37 23 34 15 48  9 48]
 [55 19 38 83 21 92 85 82 91 19]
 [21 41  3 72 34 65 89 14 83 63]
 [13 28 93 54 77 58 75 72 37 48]
 [13 10 56 21 24 47 43 96 57 35]
 [29 97  6 73 40 17 16 97 50 14]
 [ 0 70 39 80 83 67 81 36 26 79]]
Test:
 [[ 4 91  9 37 28 29  3 43 71 59]
 [65  9 17 19 58 13  3 92 11 72]
 [ 1 49 31 86 43 80 44 87 73 73]] 

Fold: 2
Train:
 [[93 64 84 37 23 34 15 48  9 48]
 [55 19 38 83 21 92 85 82 91 19]
 [21 41  3 72 34 65 89 14 83 63]
 [ 4 91  9 37 28 29  3 43 71 59]
 [65  9 17 19 58 13  3 92 11 72]
 [ 1 49 31 86 43 80 44 87 73 73]
 [ 0 70 39 80 83 67 81 36 26 79]]
Test:
 [[13 28 93 54 77 58 75 72 37 48