In [1]:

import numpy as np

X = np.array([[1, 2],
              [11, 12],
              [21, 22],
              [31, 32],
              [41, 42],
              [51, 52],
              [61, 62],
              [71, 72]])
y = np.array([0, 0, 1, 1, 1, 1, 1, 1])

In [2]:
def test_train_test_split():
    from sklearn.model_selection import train_test_split
    
    # 切分，测试集大小为原始数据集大小的 30%
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        test_size=0.3, 
                                                        random_state=0)  
    print("X_train=", X_train)
    print("X_test=", X_test)
    print("y_train=", y_train)
    print("y_test=", y_test)
    print("==================================================")
    
    # 分层采样切分，测试集大小为原始数据集大小的 30%
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        test_size=0.3,
                                                        random_state=0, 
                                                        stratify=y)  
    print("Stratify:X_train=", X_train)
    print("Stratify:X_test=", X_test)
    print("Stratify:y_train=", y_train)
    print("Stratify:y_test=", y_test)
    
test_train_test_split()

X_train= [[71 72]
 [31 32]
 [ 1  2]
 [51 52]
 [41 42]]
X_test= [[61 62]
 [21 22]
 [11 12]]
y_train= [1 1 0 1 1]
y_test= [1 1 0]
Stratify:X_train= [[61 62]
 [31 32]
 [51 52]
 [11 12]
 [41 42]]
Stratify:X_test= [[21 22]
 [71 72]
 [ 1  2]]
Stratify:y_train= [1 1 1 0 1]
Stratify:y_test= [1 1 0]


In [3]:
def test_KFold():
    from sklearn.model_selection import KFold
    # 切分之前不混洗数据集
    folder = KFold(n_splits=3, random_state=0, shuffle=False)  
    for train_index, test_index in folder.split(X, y):
        print("Train Index:", train_index)
        print("Test Index:", test_index)
        print("------------------------\n")
        
test_KFold()

Train Index: [3 4 5 6 7]
Test Index: [0 1 2]
------------------------

Train Index: [0 1 2 6 7]
Test Index: [3 4 5]
------------------------

Train Index: [0 1 2 3 4 5]
Test Index: [6 7]
------------------------



In [4]:
def test_KFold_shuffle():
    from sklearn.model_selection import KFold
    # 切分之前混洗数据集
    shuffle_folder = KFold(n_splits=3, random_state=0, shuffle=True)  
    for train_index, test_index in shuffle_folder.split(X, y):
        print("Shuffled Train Index:", train_index)
        print("Shuffled Test Index:", test_index)
        print("--------------------------\n")
        
test_KFold_shuffle()

Shuffled Train Index: [0 3 4 5 7]
Shuffled Test Index: [1 2 6]
--------------------------

Shuffled Train Index: [1 2 4 5 6]
Shuffled Test Index: [0 3 7]
--------------------------

Shuffled Train Index: [0 1 2 3 6 7]
Shuffled Test Index: [4 5]
--------------------------



In [5]:
def test_StratifiedKFold():
    from sklearn.model_selection import StratifiedKFold
    
    stratified_folder = StratifiedKFold(n_splits=3, random_state=0, shuffle=False)
    for train_index, test_index in stratified_folder.split(X, y):
        print("X train Index:", train_index, ", y_train:", y[train_index])
        print("X Test Index:", test_index, ",  y_test:", y[test_index])
        print("------------------------\n")
        
test_StratifiedKFold()

X train Index: [1 4 5 6 7] , y_train: [0 1 1 1 1]
X Test Index: [0 2 3] ,  y_test: [0 1 1]
------------------------

X train Index: [0 2 3 6 7] , y_train: [0 1 1 1 1]
X Test Index: [1 4 5] ,  y_test: [0 1 1]
------------------------

X train Index: [0 1 2 3 4 5] , y_train: [0 0 1 1 1 1]
X Test Index: [6 7] ,  y_test: [1 1]
------------------------





In [6]:
def test_LeaveOneOut():
    from sklearn.model_selection import LeaveOneOut
    X = np.array([[1, 2, 3, 4],
                  [11, 12, 13, 14],
                  [21, 22, 23, 24],
                  [31, 32, 33, 34]])
    y = np.array([1, 1, 0, 0])

    loo = LeaveOneOut()
    loo.get_n_splits(X)
    for train_index, test_index in loo.split(X):
#         print("Train Index:", train_index)
        print("Test Index:", test_index)
#         print("X_train:", X[train_index])
        print("X_test:", X[test_index])
        print("----------------\n")
        
test_LeaveOneOut()

Test Index: [0]
X_test: [[1 2 3 4]]
----------------

Test Index: [1]
X_test: [[11 12 13 14]]
----------------

Test Index: [2]
X_test: [[21 22 23 24]]
----------------

Test Index: [3]
X_test: [[31 32 33 34]]
----------------



In [7]:
def test_cross_val_score():
    from sklearn.datasets import load_digits
    from sklearn.svm import LinearSVC
    from sklearn.model_selection import cross_val_score

    digits = load_digits()  # 加载用于分类问题的数据集
    X = digits.data
    y = digits.target
    
    # 使用 LinearSVC 作为分类器
    result = cross_val_score(LinearSVC(), X, y, cv=5)
    print("Cross Val Score is:", result)

test_cross_val_score()

Cross Val Score is: [0.92032967 0.87845304 0.92479109 0.95518207 0.88169014]
