In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load a sample dataset
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameters and the range of values to test
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
    'kernel': ['linear', 'rbf', 'poly']
}

# 1. GridSearchCV Approach
grid_search = GridSearchCV(estimator=SVC(), param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy', return_train_score=True)
grid_search.fit(X_train, y_train)

# Best params and score from GridSearchCV
best_params_gs = grid_search.best_params_
best_score_gs = grid_search.best_score_
print(f"GridSearchCV Best Parameters: {best_params_gs}")
print(f"GridSearchCV Best Cross-Validation Score: {best_score_gs:.4f}")

# 2. Manual KFold Cross-Validation Approach
kf = KFold(n_splits=5, shuffle=True, random_state=42)
best_score_kf = -np.inf
best_params_kf = None

# Iterate over all combinations of parameters
for C in param_grid['C']:
    for gamma in param_grid['gamma']:
        for kernel in param_grid['kernel']:
            scores = []
            for train_idx, val_idx in kf.split(X_train):
                X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
                y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

                model = SVC(C=C, gamma=gamma, kernel=kernel)
                model.fit(X_train_fold, y_train_fold)
                score = model.score(X_val_fold, y_val_fold)
                scores.append(score)

            mean_score = np.mean(scores)
            if mean_score > best_score_kf:
                best_score_kf = mean_score
                best_params_kf = {'C': C, 'gamma': gamma, 'kernel': kernel}

print(f"KFold Cross-Validation Best Parameters: {best_params_kf}")
print(f"KFold Cross-Validation Best Score: {best_score_kf:.4f}")

# Compare the best models on the test set
best_model_gs = grid_search.best_estimator_
best_model_kf = SVC(**best_params_kf).fit(X_train, y_train)

y_pred_gs = best_model_gs.predict(X_test)
y_pred_kf = best_model_kf.predict(X_test)

test_score_gs = accuracy_score(y_test, y_pred_gs)
test_score_kf = accuracy_score(y_test, y_pred_kf)

print(f"GridSearchCV Test Set Accuracy: {test_score_gs:.4f}")
print(f"KFold Cross-Validation Test Set Accuracy: {test_score_kf:.4f}")


GridSearchCV Best Parameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
GridSearchCV Best Cross-Validation Score: 0.9583
KFold Cross-Validation Best Parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
KFold Cross-Validation Best Score: 0.9750
GridSearchCV Test Set Accuracy: 1.0000
KFold Cross-Validation Test Set Accuracy: 1.0000


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load a sample dataset
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameters and the range of values to test
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
    'kernel': ['linear', 'rbf', 'poly']
}

# 1. GridSearchCV Approach
grid_search = GridSearchCV(estimator=SVC(), param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy', return_train_score=True)
grid_search.fit(X_train, y_train)

# Best params and score from GridSearchCV
best_params_gs = grid_search.best_params_
best_score_gs = grid_search.best_score_
print(f"GridSearchCV Best Parameters: {best_params_gs}")
print(f"GridSearchCV Best Cross-Validation Score: {best_score_gs:.4f}")

# 2. Manual KFold Cross-Validation Approach
kf = KFold(n_splits=5, shuffle=True, random_state=42)
best_score_kf = -np.inf
best_params_kf = None

# Iterate over all combinations of parameters
for C in param_grid['C']:
    for gamma in param_grid['gamma']:
        for kernel in param_grid['kernel']:
            scores = []
            for train_idx, val_idx in kf.split(X_train):
                X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
                y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

                model = SVC(C=C, gamma=gamma, kernel=kernel, random_state=42)
                model.fit(X_train_fold, y_train_fold)
                score = model.score(X_val_fold, y_val_fold)
                scores.append(score)

            mean_score = np.mean(scores)
            if mean_score > best_score_kf:
                best_score_kf = mean_score
                best_params_kf = {'C': C, 'gamma': gamma, 'kernel': kernel}

print(f"KFold Cross-Validation Best Parameters: {best_params_kf}")
print(f"KFold Cross-Validation Best Score: {best_score_kf:.4f}")

# Compare the best models on the test set
best_model_gs = grid_search.best_estimator_
best_model_kf = SVC(**best_params_kf, random_state=42).fit(X_train, y_train)

y_pred_gs = best_model_gs.predict(X_test)
y_pred_kf = best_model_kf.predict(X_test)

test_score_gs = accuracy_score(y_test, y_pred_gs)
test_score_kf = accuracy_score(y_test, y_pred_kf)

print(f"GridSearchCV Test Set Accuracy: {test_score_gs:.4f}")
print(f"KFold Cross-Validation Test Set Accuracy: {test_score_kf:.4f}")


GridSearchCV Best Parameters: {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'}
GridSearchCV Best Cross-Validation Score: 0.9583
KFold Cross-Validation Best Parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
KFold Cross-Validation Best Score: 0.9750
GridSearchCV Test Set Accuracy: 1.0000
KFold Cross-Validation Test Set Accuracy: 1.0000
