In [1]:
from sklearn import svm
from time import time
from pandas import DataFrame
from pathlib import Path

from sklearn.model_selection import GridSearchCV

from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV

# make sure that result directory exists before running any of the functions

def tune_with_grid_search(x_train, y_train, param_grid):
    svc = svm.SVC()

    start = time()
    gs_results = GridSearchCV(svc, param_grid, cv=5).fit(x_train, y_train)
    duration = time() - start

    results = DataFrame(gs_results.cv_results_)
    results.loc[:, 'mean_test_score'] *= 100

    # take the most relevant columns and sort (for readability)
    results = results.loc[:, ('rank_test_score', 'mean_test_score', 'params')]
    results.sort_values(by='rank_test_score', ascending=True, inplace=True)

    return results, duration


def tune_with_halving_grid_search(x_train, y_train, param_grid):
    svc = svm.SVC()

    start = time()
    halving_gs_results = HalvingGridSearchCV(
        svc,
        param_grid,
        cv=5,
        factor=3,
        min_resources='exhaust'
    ).fit(x_train, y_train)

    duration = time() - start

    results = DataFrame(halving_gs_results.cv_results_)
    results.loc[:, 'mean_test_score'] *= 100

    # take the most relevant columns and sort (for readability). Remember to sort on the iter columns first, so we see
    # the models with the most training data behind them first.
    results = results.loc[:, ('iter', 'rank_test_score', 'mean_test_score', 'params')]
    results.sort_values(by=['iter', 'rank_test_score'], ascending=[False, True], inplace=True)

    return results, duration

In [3]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

RANDOM_STATE = 35090


def main():
    # use fixed random state for repeatable data set
    X, Y = make_classification(n_samples=3000, random_state=RANDOM_STATE)
    x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=RANDOM_STATE)

    svc_params = {
        'C': [0.1, 0.5, 1, 2, 5, 10],
        'kernel': ['linear', 'rbf', 'sigmoid', 'poly'],
        'tol': [1e-3, 1e-2]
    }

    gs_results, gs_duration = tune_with_grid_search(x_train, y_train, svc_params)
    halving_results, halving_duration = tune_with_halving_grid_search(x_train, y_train, svc_params)

    print(gs_results.head())
    print(halving_results.head())

    score1 = gs_results['mean_test_score'].iloc[0]
    params1 = gs_results['params'].iloc[0]
    score2 = halving_results['mean_test_score'].iloc[0]
    params2 = halving_results['params'].iloc[0]

    svc1 = svm.SVC(**params1)
    svc1.fit(x_train, y_train)
    accuracy1 = accuracy_score(y_test, svc1.predict(x_test))

    svc2 = svm.SVC(**params2)
    svc2.fit(x_train, y_train)
    accuracy2 = accuracy_score(y_test, svc2.predict(x_test))

    print(f'Best score for GridSearchCv is {score1:.3f}, took {gs_duration:.2f} seconds')
    print(f'Params: {params1}')
    print(f'Corresponding test accuracy: {accuracy1 * 100:.2f}%\n')

    print(f'Best score for HalvingGridSearchCv is {score2:.3f}, took {halving_duration:.2f} seconds')
    print(f'Params: {params2}')
    print(f'Corresponding test accuracy: {accuracy2 * 100:.2f}%')


if __name__ == '__main__':
    main()

   rank_test_score  mean_test_score  \
4                1        86.444444   
2                2        86.400000   
3                2        86.400000   
5                4        86.355556   
6                4        86.355556   

                                          params  
4  {'C': 0.1, 'kernel': 'sigmoid', 'tol': 0.001}  
2      {'C': 0.1, 'kernel': 'rbf', 'tol': 0.001}  
3       {'C': 0.1, 'kernel': 'rbf', 'tol': 0.01}  
5   {'C': 0.1, 'kernel': 'sigmoid', 'tol': 0.01}  
6     {'C': 0.1, 'kernel': 'poly', 'tol': 0.001}  
    iter  rank_test_score  mean_test_score  \
70     3                3        85.937500   
71     3                3        85.937500   
68     2                1        86.308725   
69     2                1        86.308725   
64     2                5        85.637584   

                                         params  
70    {'C': 0.5, 'kernel': 'rbf', 'tol': 0.001}  
71     {'C': 0.5, 'kernel': 'rbf', 'tol': 0.01}  
68    {'C': 0.5, 'kernel': 'rbf'