# SVM Cross Validation (Pure For-Loops) + GridSearchCV
### With Best-Parameter Tables per Method and Overall Summaries

- Model: **SVM (SVC)** only
- Datasets: **Iris**, **Wine**, **Breast Cancer**
- Splitters: **KFold**, **StratifiedKFold**, **GroupKFold**
- Implementation: **No helper functions**, manual `for` loops only
- Hyperparameter search: **GridSearchCV**
- Scaling: `StandardScaler` fit on train of each outer fold only


In [1]:
# ============================================
# Imports
# ============================================
import numpy as np
import pandas as pd
import json
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
import warnings
warnings.filterwarnings('ignore')

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
results_all = []  # will store per-fold outcomes across all datasets & splitters


In [2]:
# ============================================
# GridSearch parameters for SVC
# ============================================
param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01, 0.1, 1.0]
}


## Dataset: Iris
โหลดและเตรียมข้อมูล พร้อมสร้างกลุ่มสุ่ม 5 กลุ่มสำหรับสาธิต GroupKFold


In [3]:
# ============================================
# Load Iris
# ============================================
iris = load_iris()
feature_names = iris.feature_names
X_raw = iris.data
y_raw = iris.target
X = pd.DataFrame(X_raw, columns=feature_names)
y = pd.Series(y_raw, name='target')

# Random groups for GroupKFold demo (0..4)
groups = np.random.randint(0, 5, size=len(y))
print('Dataset:', 'Iris', '| X shape:', X.shape, '| y shape:', y.shape)


Dataset: Iris | X shape: (150, 4) | y shape: (150,)


In [4]:
# ============================================
# 1) K-Fold
# ============================================
print('\n===== K-Fold Cross Validation =====')
kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
fold = 0
for train_index, test_index in kf.split(X):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Iris', 'splitter': 'KFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== K-Fold Cross Validation =====
Fold 1 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9667 - F1(macro): 0.9659
Fold 2 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9667 - F1(macro): 0.9585
Fold 3 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9667 - F1(macro): 0.9628
Fold 4 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9333 - F1(macro): 0.9389
Fold 5 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 1.0000 - F1(macro): 1.0000


In [5]:
# ============================================
# 2) Stratified K-Fold
# ============================================
print('\n===== Stratified K-Fold Cross Validation =====')
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
fold = 0
for train_index, test_index in skf.split(X, y):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Iris', 'splitter': 'StratifiedKFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== Stratified K-Fold Cross Validation =====
Fold 1 - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 2 - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9667 - F1(macro): 0.9666
Fold 3 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.8667 - F1(macro): 0.8653
Fold 4 - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 5 - Best Params: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9667 - F1(macro): 0.9666


In [6]:
# ============================================
# 3) Group K-Fold
# ============================================
print('\n===== Group K-Fold Cross Validation =====')
gkf = GroupKFold(n_splits=5)
fold = 0
for train_index, test_index in gkf.split(X, y, groups=groups):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Groups Test: {set(groups[test_index])} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Iris', 'splitter': 'GroupKFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== Group K-Fold Cross Validation =====
Fold 1 - Groups Test: {np.int32(3)} - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9500 - F1(macro): 0.9500
Fold 2 - Groups Test: {np.int32(4)} - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9000 - F1(macro): 0.8977
Fold 3 - Groups Test: {np.int32(1)} - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 4 - Groups Test: {np.int32(2)} - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9630 - F1(macro): 0.9585
Fold 5 - Groups Test: {np.int32(0)} - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9600 - F1(macro): 0.9619


## Dataset: Wine
โหลดและเตรียมข้อมูล พร้อมสร้างกลุ่มสุ่ม 5 กลุ่มสำหรับสาธิต GroupKFold


In [7]:
# ============================================
# Load Wine
# ============================================
wine = load_wine()
feature_names = wine.feature_names
X_raw = wine.data
y_raw = wine.target
X = pd.DataFrame(X_raw, columns=feature_names)
y = pd.Series(y_raw, name='target')

# Random groups for GroupKFold demo (0..4)
groups = np.random.randint(0, 5, size=len(y))
print('Dataset:', 'Wine', '| X shape:', X.shape, '| y shape:', y.shape)


Dataset: Wine | X shape: (178, 13) | y shape: (178,)


In [8]:
# ============================================
# 1) K-Fold
# ============================================
print('\n===== K-Fold Cross Validation =====')
kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
fold = 0
for train_index, test_index in kf.split(X):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Wine', 'splitter': 'KFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== K-Fold Cross Validation =====
Fold 1 - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 2 - Best Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 3 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9722 - F1(macro): 0.9729
Fold 4 - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9714 - F1(macro): 0.9708
Fold 5 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9714 - F1(macro): 0.9710


In [9]:
# ============================================
# 2) Stratified K-Fold
# ============================================
print('\n===== Stratified K-Fold Cross Validation =====')
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
fold = 0
for train_index, test_index in skf.split(X, y):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Wine', 'splitter': 'StratifiedKFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== Stratified K-Fold Cross Validation =====
Fold 1 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9444 - F1(macro): 0.9458
Fold 2 - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 3 - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9444 - F1(macro): 0.9407
Fold 4 - Best Params: {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9714 - F1(macro): 0.9701
Fold 5 - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000


In [10]:
# ============================================
# 3) Group K-Fold
# ============================================
print('\n===== Group K-Fold Cross Validation =====')
gkf = GroupKFold(n_splits=5)
fold = 0
for train_index, test_index in gkf.split(X, y, groups=groups):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Groups Test: {set(groups[test_index])} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Wine', 'splitter': 'GroupKFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== Group K-Fold Cross Validation =====
Fold 1 - Groups Test: {np.int32(0)} - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 2 - Groups Test: {np.int32(2)} - Best Params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9500 - F1(macro): 0.9488
Fold 3 - Groups Test: {np.int32(3)} - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 4 - Groups Test: {np.int32(1)} - Best Params: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 5 - Groups Test: {np.int32(4)} - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000


## Dataset: Breast Cancer
โหลดและเตรียมข้อมูล พร้อมสร้างกลุ่มสุ่ม 5 กลุ่มสำหรับสาธิต GroupKFold


In [11]:
# ============================================
# Load Breast Cancer
# ============================================
bc = load_breast_cancer()
feature_names = bc.feature_names
X_raw = bc.data
y_raw = bc.target
X = pd.DataFrame(X_raw, columns=feature_names)
y = pd.Series(y_raw, name='target')

# Random groups for GroupKFold demo (0..4)
groups = np.random.randint(0, 5, size=len(y))
print('Dataset:', 'Breast Cancer', '| X shape:', X.shape, '| y shape:', y.shape)


Dataset: Breast Cancer | X shape: (569, 30) | y shape: (569,)


In [12]:
# ============================================
# 1) K-Fold
# ============================================
print('\n===== K-Fold Cross Validation =====')
kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
fold = 0
for train_index, test_index in kf.split(X):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Breast Cancer', 'splitter': 'KFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== K-Fold Cross Validation =====
Fold 1 - Best Params: {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9386 - F1(macro): 0.9355
Fold 2 - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9912 - F1(macro): 0.9901
Fold 3 - Best Params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9649 - F1(macro): 0.9619
Fold 4 - Best Params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9825 - F1(macro): 0.9812
Fold 5 - Best Params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9823 - F1(macro): 0.9815


In [13]:
# ============================================
# 2) Stratified K-Fold
# ============================================
print('\n===== Stratified K-Fold Cross Validation =====')
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
fold = 0
for train_index, test_index in skf.split(X, y):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Breast Cancer', 'splitter': 'StratifiedKFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== Stratified K-Fold Cross Validation =====
Fold 1 - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 1.0000 - F1(macro): 1.0000
Fold 2 - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9474 - F1(macro): 0.9435
Fold 3 - Best Params: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'} - Acc: 0.9649 - F1(macro): 0.9615
Fold 4 - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9912 - F1(macro): 0.9906
Fold 5 - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9823 - F1(macro): 0.9809


In [14]:
# ============================================
# 3) Group K-Fold
# ============================================
print('\n===== Group K-Fold Cross Validation =====')
gkf = GroupKFold(n_splits=5)
fold = 0
for train_index, test_index in gkf.split(X, y, groups=groups):
    fold += 1
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled  = scaler.transform(X_test)

    grid = GridSearchCV(SVC(), param_grid, cv=3, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average='macro')
    print(f"Fold {fold} - Groups Test: {set(groups[test_index])} - Best Params: {grid.best_params_} - Acc: {acc:.4f} - F1(macro): {f1m:.4f}")
    results_all.append({'dataset': 'Breast Cancer', 'splitter': 'GroupKFold', 'fold': fold,
                        'accuracy': acc, 'f1_macro': f1m, 'best_params': grid.best_params_})



===== Group K-Fold Cross Validation =====
Fold 1 - Groups Test: {np.int32(4)} - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9764 - F1(macro): 0.9741
Fold 2 - Groups Test: {np.int32(0)} - Best Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} - Acc: 0.9569 - F1(macro): 0.9536
Fold 3 - Groups Test: {np.int32(1)} - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9741 - F1(macro): 0.9724
Fold 4 - Groups Test: {np.int32(3)} - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9545 - F1(macro): 0.9506
Fold 5 - Groups Test: {np.int32(2)} - Best Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'} - Acc: 0.9900 - F1(macro): 0.9894


## Best Parameters per Method (per dataset & splitter)
เลือกพารามิเตอร์ที่ดีที่สุดต่อ **dataset+splitter** จากค่าที่ถูกเลือกเป็น `best_params` ในแต่ละ fold แล้วคำนวณค่าเฉลี่ยคะแนนตามพารามิเตอร์นั้นๆ

In [16]:
df_all = pd.DataFrame(results_all).copy()
# แปลง dict -> string เพื่อให้ groupby ได้
df_all['best_params_str'] = df_all['best_params'].apply(lambda d: json.dumps(d, sort_keys=True))

agg = (df_all.groupby(['dataset','splitter','best_params_str'])
       .agg(mean_accuracy=('accuracy','mean'),
            mean_f1_macro=('f1_macro','mean'),
            times_selected=('best_params_str','size'))
       .reset_index())

# เลือกแถวที่ accuracy เฉลี่ยดีที่สุดในแต่ละ dataset+splitter
idx = agg.groupby(['dataset','splitter'])['mean_accuracy'].idxmax()
best_params_per_method = agg.loc[idx].reset_index(drop=True)
best_params_per_method['best_params'] = best_params_per_method['best_params_str'].apply(lambda s: json.loads(s))
best_params_per_method = best_params_per_method.drop(columns=['best_params_str'])
best_params_per_method = best_params_per_method.sort_values(['dataset','splitter']).reset_index(drop=True)
best_params_per_method


Unnamed: 0,dataset,splitter,mean_accuracy,mean_f1_macro,times_selected,best_params
0,Breast Cancer,GroupKFold,0.973765,0.971634,4,"{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}"
1,Breast Cancer,KFold,0.991228,0.990065,1,"{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}"
2,Breast Cancer,StratifiedKFold,0.982301,0.98086,1,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}"
3,Iris,GroupKFold,0.954593,0.953618,5,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}"
4,Iris,KFold,0.975,0.971808,4,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}"
5,Iris,StratifiedKFold,1.0,1.0,1,"{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}"
6,Wine,GroupKFold,1.0,1.0,1,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}"
7,Wine,KFold,1.0,1.0,1,"{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}"
8,Wine,StratifiedKFold,1.0,1.0,2,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}"


## Overall Summaries with Model Parameters

In [17]:
# Best splitter (and its params) per dataset by highest mean accuracy
overall_best_by_dataset = (best_params_per_method
                          .sort_values(['dataset','mean_accuracy'], ascending=[True, False])
                          .groupby('dataset', as_index=False)
                          .head(1)
                          .reset_index(drop=True))
overall_best_by_dataset


Unnamed: 0,dataset,splitter,mean_accuracy,mean_f1_macro,times_selected,best_params
0,Breast Cancer,KFold,0.991228,0.990065,1,"{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}"
1,Iris,StratifiedKFold,1.0,1.0,1,"{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}"
2,Wine,GroupKFold,1.0,1.0,1,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}"
