In [80]:
import pandas as pd
import numpy as np
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV

In [81]:
iris = datasets.load_iris()

In [82]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [83]:
df['flower'] = iris.target
df.flower = df.flower.apply(lambda x: iris.target_names[x])
df[45:55]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
45,4.8,3.0,1.4,0.3,setosa
46,5.1,3.8,1.6,0.2,setosa
47,4.6,3.2,1.4,0.2,setosa
48,5.3,3.7,1.5,0.2,setosa
49,5.0,3.3,1.4,0.2,setosa
50,7.0,3.2,4.7,1.4,versicolor
51,6.4,3.2,4.5,1.5,versicolor
52,6.9,3.1,4.9,1.5,versicolor
53,5.5,2.3,4.0,1.3,versicolor
54,6.5,2.8,4.6,1.5,versicolor


In [84]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

In [85]:
svm_model = svm.SVC(kernel='rbf', gamma='auto', C=30)
svm_model.fit(X_train, y_train)
svm_model.score(X_test, y_test)

0.9333333333333333

In [86]:
cross_val_score(svm.SVC(kernel='rbf', C=10, gamma='auto'), iris.data, iris.target, cv=5)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [87]:
cross_val_score(svm.SVC(kernel='rbf', C=20, gamma='auto'), iris.data, iris.target, cv=5)

array([0.96666667, 1.        , 0.9       , 0.96666667, 1.        ])

In [88]:
cross_val_score(svm.SVC(kernel='rbf', C=30, gamma='auto'), iris.data, iris.target, cv=5)

array([0.96666667, 1.        , 0.9       , 0.93333333, 1.        ])

In [89]:
# Same thing using a for loop

kernels = ['rbf','linear']
c = [10,20,30]
avg_scores = {}

for kval in kernels:
    for cval in c:
        cross_val = cross_val_score(svm.SVC(kernel=kval, C=cval, gamma='auto'), iris.data, iris.target)
        avg_scores[kval + "_" + str(cval)] = np.average(cross_val)

print(avg_scores)

{'rbf_10': 0.9800000000000001, 'rbf_20': 0.9666666666666668, 'rbf_30': 0.96, 'linear_10': 0.9733333333333334, 'linear_20': 0.9666666666666666, 'linear_30': 0.96}


In [90]:
clf = GridSearchCV(svm.SVC(gamma='auto'), {
    'C': [10,20,30],
    'kernel' : ['rbf','linear']
}, cv=5, return_train_score=False)

clf.fit(iris.data, iris.target)
clf.cv_results_

{'mean_fit_time': array([0.0025887 , 0.00099936, 0.00180311, 0.00099907, 0.00159945,
        0.00119972]),
 'std_fit_time': array([1.01091329e-03, 2.43140197e-07, 7.54128366e-04, 4.42200589e-07,
        4.90096228e-04, 4.00900965e-04]),
 'mean_score_time': array([0.00181651, 0.00039968, 0.00039968, 0.00059972, 0.00059934,
        0.00059981]),
 'std_score_time': array([0.00115951, 0.00048951, 0.00048951, 0.00048967, 0.00048936,
        0.00048975]),
 'param_C': masked_array(data=[10, 10, 20, 20, 30, 30],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'},
  {'C': 30, 'kernel': 'rbf'},
  {'C'

In [91]:
df = pd.DataFrame(clf.cv_results_)
df[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,10,rbf,0.98
1,10,linear,0.973333
2,20,rbf,0.966667
3,20,linear,0.966667
4,30,rbf,0.96
5,30,linear,0.96


In [92]:
dir(clf)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_is_fitted',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_estimator_type',
 '_format_results',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_pairwise',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run_search',
 '_validate_data',
 'best_estimator_',
 'best_index_',
 'best_params_',
 'best_score_',
 'classes_',
 'cv',
 'cv_results_',
 'decision_function',
 'error_score',
 'estimator',
 'fit',
 'get_params',
 'inverse_transform',
 'multimetric_',
 'n_features_in_',
 'n_jobs',
 'n_splits

In [93]:
clf.best_score_

0.9800000000000001

In [94]:
clf.best_params_

{'C': 10, 'kernel': 'rbf'}

In [95]:
clf = RandomizedSearchCV(svm.SVC(gamma='auto'), {
    'C': [10,20,30],
    'kernel': ['linear','rbf']
}, cv=5, return_train_score=False, n_iter=2)

clf.fit(iris.data, iris.target)
pd.DataFrame(clf.cv_results_)[['param_kernel','param_C','mean_test_score']]

Unnamed: 0,param_kernel,param_C,mean_test_score
0,linear,20,0.966667
1,rbf,30,0.96
