In [7]:
import numpy as np

In [9]:
X = np.array([[1,2],
              [3,4],
              [1,2],
              [3,4]])
y = np.array([1, 2, 3, 4])

In [27]:


    # indices[start_indices:end_indices]
    # indices[0:5]
    # indices[5:10]
    # indices[10:14]

In [30]:
split(np.arange(14))

[ 5  6  7  8  9 10 11 12 13] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13]


In [35]:
class KFold:
    '''
    K-Fold cross-validator

    Provide train/test indices to split data in train/test sets.
    Split dataset into k consecutive folds (without shuffling by)

    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 5

    shuffle : bool, default=False
        Wheater shuffle the data before splitting into batches

    random_state : int, default=42
        When 'shuffle' is True, 'random_state' affects the ordering
        of the indices. Otherwise, this parameter has no affect
    '''
    def __init__(
        self,
        n_splits=5,
        shuffle=False,
        random_state=42
    ):
        self.n_splits = n_splits
        self.shuffle = shuffle
        self.random_state = random_state

    def _iter_test_indices(self, X):
        # cari jumlah data yang ingin di split
        n_samples = len(X)
        indices = np.arange(n_samples)
        indices

        # Tentukan jumlah split
        n_splits = self.n_splits
        fold_sizes = np.ones(n_splits, dtype=int) * int(n_samples / n_splits)
        fold_sizes[: n_samples%n_splits] += 1

        current = 0
        for fold_size in fold_sizes:
            start = current
            end = fold_size + current
            
            # Seleksi index untuk data validasi
            yield indices[start:end]

            # Update current index
            current = end

    def split(self, X):
        # Inisiasi
        n_samples = len(X)
        indices = np.arange(n_samples)

        for test_index in self._iter_test_indices(X):
            train_index = np.array([ind for ind in indices if ind not in test_index])

            yield (train_index, test_index)


In [36]:
kf = KFold(n_splits=2)

for i, (train_index, test_index) in enumerate(kf.split(X)):
    print(f"Fold {i}")
    print(f"    Train: index={train_index}")
    print(X[train_index, :])
    print(y[train_index])
    print(f"    Test: index={test_index}")

Fold 0
    Train: index=[2 3]
    Test: index=[0 1]
Fold 1
    Train: index=[0 1]
    Test: index=[2 3]
