In [3]:
import pandas as pd
from sklearn.model_selection import KFold

# Sample DataFrame
df = pd.DataFrame({
    'feature1': [1, 2, 3, 4, 5, 6],
    'feature2': [7, 8, 9, 10, 11, 12],
    'target': [0, 1, 0, 1, 0, 1]
})

cv = KFold(n_splits=3)

for train_index, test_index in cv.split(df):
    print("TRAIN:", train_index, "TEST:", test_index)


TRAIN: [2 3 4 5] TEST: [0 1]
TRAIN: [0 1 4 5] TEST: [2 3]
TRAIN: [0 1 2 3] TEST: [4 5]


In [4]:
from sklearn.model_selection import StratifiedKFold

# Sample data
X = pd.DataFrame({
    'feature1': [1, 2, 3, 4, 5, 6],
    'feature2': [7, 8, 9, 10, 11, 12]
})
y = pd.Series([0, 1, 0, 1, 0, 1])  # Target values (binary classification)

# Initialize StratifiedKFold with 3 splits, shuffle enabled for randomness
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

for train_index, test_index in cv.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    
    # Splitting the data using the indices
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Display train/test split for verification
    print("X_train:\n", X_train)
    print("y_train:\n", y_train)
    print("X_test:\n", X_test)
    print("y_test:\n", y_test)
    print("=" * 50)


TRAIN: [1 2 3 4] TEST: [0 5]
X_train:
    feature1  feature2
1         2         8
2         3         9
3         4        10
4         5        11
y_train:
 1    1
2    0
3    1
4    0
dtype: int64
X_test:
    feature1  feature2
0         1         7
5         6        12
y_test:
 0    0
5    1
dtype: int64
TRAIN: [0 3 4 5] TEST: [1 2]
X_train:
    feature1  feature2
0         1         7
3         4        10
4         5        11
5         6        12
y_train:
 0    0
3    1
4    0
5    1
dtype: int64
X_test:
    feature1  feature2
1         2         8
2         3         9
y_test:
 1    1
2    0
dtype: int64
TRAIN: [0 1 2 5] TEST: [3 4]
X_train:
    feature1  feature2
0         1         7
1         2         8
2         3         9
5         6        12
y_train:
 0    0
1    1
2    0
5    1
dtype: int64
X_test:
    feature1  feature2
3         4        10
4         5        11
y_test:
 3    1
4    0
dtype: int64
