# RandomizedSearchCV

In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV


### Setting up


In [2]:
# Load data
dataObj = load_breast_cancer()
X = dataObj.data
y = dataObj.target

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, 
    stratify=y,
    test_size=0.30,
    random_state=1)

# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Classifier
svc = SVC(random_state=1)

In [3]:
pd.DataFrame(data=svc.get_params(), index=["param"]).T

Unnamed: 0,param
C,1.0
break_ties,False
cache_size,200
class_weight,
coef0,0.0
decision_function_shape,ovr
degree,3
gamma,scale
kernel,rbf
max_iter,-1


### Define parameter sets


In [4]:
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]

set1 = {"C": param_range, "kernel": ["linear"]}

set2 = {"C": param_range, "gamma": param_range, "kernel": ["rbf"]}

param_grid = [set1, set2]

### RandomizedSearchCV classifier


In [5]:
rs = RandomizedSearchCV(estimator=svc, 
                        param_distributions=param_grid, 
                        n_iter = 10,
                        scoring='accuracy', 
                        cv=5,
                        n_jobs=-1)

In [6]:
# Get parameter names
for k, v in rs.get_params().items():
    print(f"{k:35.35s}: {str(v)}")

cv                                 : 5
error_score                        : nan
estimator__C                       : 1.0
estimator__break_ties              : False
estimator__cache_size              : 200
estimator__class_weight            : None
estimator__coef0                   : 0.0
estimator__decision_function_shape : ovr
estimator__degree                  : 3
estimator__gamma                   : scale
estimator__kernel                  : rbf
estimator__max_iter                : -1
estimator__probability             : False
estimator__random_state            : 1
estimator__shrinking               : True
estimator__tol                     : 0.001
estimator__verbose                 : False
estimator                          : SVC(random_state=1)
n_iter                             : 10
n_jobs                             : -1
param_distributions                : [{'C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0], 'kernel': ['linear']}, {'C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10

### Training


In [7]:
rs.fit(X_train_std,y_train)

In [8]:
df = pd.DataFrame(rs.cv_results_)
print(df.shape)
display(df.head())

(10, 16)


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_gamma,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.008013,0.001262,0.004807,0.000513,rbf,1.0,10.0,"{'kernel': 'rbf', 'gamma': 1.0, 'C': 10.0}",0.625,0.625,0.625,0.64557,0.632911,0.630696,0.008043,4
1,0.008812,0.001079,0.005221,0.001024,rbf,10.0,1.0,"{'kernel': 'rbf', 'gamma': 10.0, 'C': 1.0}",0.625,0.625,0.625,0.632911,0.632911,0.628165,0.003876,6
2,0.002002,0.000633,0.0006,0.00049,linear,,1.0,"{'kernel': 'linear', 'C': 1.0}",0.9625,0.9875,0.95,0.974684,0.987342,0.972405,0.014534,2
3,0.0016,0.00049,0.000401,0.000491,linear,,0.1,"{'kernel': 'linear', 'C': 0.1}",0.975,1.0,0.9625,0.974684,1.0,0.982437,0.015032,1
4,0.006658,0.000931,0.005304,0.000602,rbf,10.0,0.001,"{'kernel': 'rbf', 'gamma': 10.0, 'C': 0.001}",0.625,0.625,0.625,0.632911,0.632911,0.628165,0.003876,6


In [9]:
df = df.sort_values(by=['rank_test_score'])
display(df.head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_gamma,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
3,0.0016,0.00049,0.000401,0.000491,linear,,0.1,"{'kernel': 'linear', 'C': 0.1}",0.975,1.0,0.9625,0.974684,1.0,0.982437,0.015032,1
2,0.002002,0.000633,0.0006,0.00049,linear,,1.0,"{'kernel': 'linear', 'C': 1.0}",0.9625,0.9875,0.95,0.974684,0.987342,0.972405,0.014534,2
9,0.005608,0.002941,0.0006,0.00049,linear,,1000.0,"{'kernel': 'linear', 'C': 1000.0}",0.95,0.95,0.9,0.962025,0.949367,0.942278,0.021665,3
0,0.008013,0.001262,0.004807,0.000513,rbf,1.0,10.0,"{'kernel': 'rbf', 'gamma': 1.0, 'C': 10.0}",0.625,0.625,0.625,0.64557,0.632911,0.630696,0.008043,4
6,0.007512,0.002169,0.004708,0.000513,rbf,1.0,1000.0,"{'kernel': 'rbf', 'gamma': 1.0, 'C': 1000.0}",0.625,0.625,0.625,0.64557,0.632911,0.630696,0.008043,4


### Refitting

- Note that grid search already refit the entire training data with the best parameters. You can check this from this setting.


In [10]:
rs.refit

True

In [11]:
print(rs.best_score_)
print(rs.best_params_)

0.9824367088607595
{'kernel': 'linear', 'C': 0.1}


### Test result


In [12]:
y_pred = rs.predict(X_test)
testing_accuracy = rs.score(X_test_std,y_test)
print(f"Testing accuracy: {testing_accuracy:6.3f}")

Testing accuracy:  0.971


In [13]:
# To do this manually
clf = rs.best_estimator_
clf.fit(X_train_std, y_train)
testing_accuracy = clf.score(X_test_std,y_test)
print(f"Testing accuracy: {testing_accuracy:6.3f}")

Testing accuracy:  0.971
