https://github.com/codebasics/py/blob/master/ML/15_gridsearch/15_grid_search.ipynb
    
    https://www.youtube.com/watch?v=HdlDYng8g9s&list=PLeo1K3hjS3us_ELKYSj_Fth2tIEkdKXvV&index=55

Finding best model and hyper parameter tunning using GridSearchCV

For iris flower dataset in sklearn library, we are going to find out best model and best hyper parameters using GridSearchCV



In [1]:
import pandas as pd

In [2]:
from sklearn import svm, datasets
iris = datasets.load_iris()

In [3]:
import pandas as pd
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df['flower'] = iris.target
df['flower'] = df['flower'].apply(lambda x: iris.target_names[x])
df[47:150]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
47,4.6,3.2,1.4,0.2,setosa
48,5.3,3.7,1.5,0.2,setosa
49,5.0,3.3,1.4,0.2,setosa
50,7.0,3.2,4.7,1.4,versicolor
51,6.4,3.2,4.5,1.5,versicolor
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


Approach 1: Use train_test_split and manually tune parameters by trial and error

    
    

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)


In [5]:
model = svm.SVC(kernel='rbf',C=30,gamma='auto')
model.fit(X_train,y_train)
model.score(X_test, y_test)

0.9777777777777777

Approach 2: Use K Fold Cross validation

    

Manually try suppling models with different parameters to cross_val_score function with 5 fold cross validation



In [6]:
from sklearn.model_selection import cross_val_score


In [7]:
cross_val_score(svm.SVC(kernel='linear',C=10,gamma='auto'),iris.data, iris.target, cv=5)


array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [8]:
cross_val_score(svm.SVC(kernel='rbf',C=10,gamma='auto'),iris.data, iris.target, cv=5)


array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [9]:
cross_val_score(svm.SVC(kernel='rbf',C=20,gamma='auto'),iris.data, iris.target, cv=5)


array([0.96666667, 1.        , 0.9       , 0.96666667, 1.        ])

Above approach is tiresome and very manual. We can use for loop as an alternative



In [10]:
kernels = ['rbf', 'linear']
C = [1,10,20]
avg_scores = {}
for kval in kernels:
    for cval in C:
        cv_scores = cross_val_score(svm.SVC(kernel=kval,C=cval,gamma='auto'),iris.data, iris.target, cv=5)
        avg_scores[kval + '_' + str(cval)] = cv_scores.mean()

avg_scores

{'rbf_1': 0.9800000000000001,
 'rbf_10': 0.9800000000000001,
 'rbf_20': 0.9666666666666668,
 'linear_1': 0.9800000000000001,
 'linear_10': 0.9733333333333334,
 'linear_20': 0.9666666666666666}

From above results we can say that rbf with C=1 or 10 or linear with C=1 will give best performance



Approach 3: Use GridSearchCV
#GridSearchCV does exactly same thing as for loop above but in a single line of code








In [11]:
from sklearn.model_selection import GridSearchCV
clf = GridSearchCV(svm.SVC(gamma='auto'), {
    'C': [1,10,20],
    'kernel': ['rbf','linear']
}, cv=5, return_train_score=False)
clf.fit(iris.data, iris.target)
clf.cv_results_

{'mean_fit_time': array([0.00109854, 0.00055075, 0.00064569, 0.00052261, 0.00060172,
        0.00052094]),
 'std_fit_time': array([4.37167276e-04, 2.35959725e-05, 3.47991171e-05, 3.14032658e-05,
        2.03880090e-05, 1.32342715e-05]),
 'mean_score_time': array([0.00043621, 0.00029998, 0.00030541, 0.0002728 , 0.00029039,
        0.00026126]),
 'std_score_time': array([1.41368394e-04, 3.87977873e-05, 7.08388101e-06, 1.16395159e-05,
        6.81560283e-06, 3.91877762e-06]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'ker

In [12]:
df = pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001099,0.000437,0.000436,0.000141,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.000551,2.4e-05,0.0003,3.9e-05,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.000646,3.5e-05,0.000305,7e-06,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.000523,3.1e-05,0.000273,1.2e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.000602,2e-05,0.00029,7e-06,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5
5,0.000521,1.3e-05,0.000261,4e-06,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6


In [13]:
df[['param_C','param_kernel','mean_test_score']]


Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


In [14]:
clf.best_params_


{'C': 1, 'kernel': 'rbf'}

In [15]:
clf.best_score_


0.9800000000000001

In [16]:
dir(clf)


['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_feature_names',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_estimator_type',
 '_format_results',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_pairwise',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run_search',
 '_select_best_index',
 '_validate_data',
 'best_estimator_',
 'best_index_',
 'best_params_',
 'best_score_',
 'classes_',
 'cv',
 'cv_results_',
 'decision_function',
 'error_score',
 'estimator',
 'fit',
 'get_params',
 'inverse_transform',
 'multimetric_',
 'n_features_

Use RandomizedSearchCV to reduce number of iterations and with random combination of parameters.
This is useful when you have too many parameters to try and your training time is longer.
It helps reduce the cost of computation

In [17]:
from sklearn.model_selection import RandomizedSearchCV
rs = RandomizedSearchCV(svm.SVC(gamma='auto'), {
        'C': [1,10,20],
        'kernel': ['rbf','linear']
    }, 
    cv=5, 
    return_train_score=False, 
    n_iter=2
)
rs.fit(iris.data, iris.target)
pd.DataFrame(rs.cv_results_)[['param_C','param_kernel','mean_test_score']]



Unnamed: 0,param_C,param_kernel,mean_test_score
0,10,rbf,0.98
1,1,rbf,0.98


How about different models with different hyperparameters?



In [18]:

from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}


In [19]:

scores = []

for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(iris.data, iris.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.953333,{'n_estimators': 1}
2,logistic_regression,0.966667,{'C': 5}


Based on above, I can conclude that SVM with C=1 and kernel='rbf' is the best model
for solving my problem of iris flower classification

In [20]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {                            # here we can increse the no. of parameter according to our choices
           'C':[0.1,1,100,1000],
               'kernel':['rbf','poly','sigmoid','linear'],
               'degree':[1,2,3,4,5,6]
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
#             'n_estimators': [1,5,10],
#             'max_depth':[3,5,7,10,15],
#           'min_samples_leaf':[3,5,10,15,20],
#           'min_samples_split':[8,10,12,18,20,16],         
#           'criterion':['gini','entropy','log_loss'],
#           'splitter':['best','random'],
#           'max_features':['auto', 'sqrt', 'log2']
#             'criterion':['gini','entropy','log_loss'],
#   'splitter':['best','random'],
#   'max_depth':[1,2,3,4,5],
#   'max_features':['auto', 'sqrt', 'log2']
            'max_depth':[3,5,7,10,15],
          'min_samples_leaf':[3,5,10,15,20],
          'min_samples_split':[8,10,12,18,20,16],
          'criterion':['gini','entropy']
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}


In [21]:
## here basically we run a loop and in final we get about gridserach cv result of every model parameters
scores = [] 
for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'],n_jobs=-1,cv=5, return_train_score=False,verbose=2)
    clf.fit(iris.data, iris.target)
    score1=cross_val_score(mp['model'], iris.data, iris.target,cv=5)
    score1.mean()
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_,
        'cross_val_Score': score1.mean()
    })

 #the whole inforamtion we get here is just becos of using verbose=2

Fitting 5 folds for each of 96 candidates, totalling 480 fits
Fitting 5 folds for each of 300 candidates, totalling 1500 fits
[CV] END ........................C=0.1, degree=1, kernel=rbf; total time=   0.0s
[CV] END ....................C=0.1, degree=2, kernel=sigmoid; total time=   0.0s
[CV] END .....................C=0.1, degree=2, kernel=linear; total time=   0.0s
[CV] END .....................C=0.1, degree=2, kernel=linear; total time=   0.0s
[CV] END .......................C=0.1, degree=3, kernel=poly; total time=   0.0s
[CV] END .......................C=0.1, degree=3, kernel=poly; total time=   0.0s
[CV] END ....................C=0.1, degree=3, kernel=sigmoid; total time=   0.0s
[CV] END ....................C=0.1, degree=3, kernel=sigmoid; total time=   0.0s
[CV] END .....................C=0.1, degree=3, kernel=linear; total time=   0.0s
[CV] END .....................C=0.1, degree=3, kernel=linear; total time=   0.0s
[CV] END ........................C=0.1, degree=5, kernel=rbf; to

[CV] END ........................C=0.1, degree=1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, degree=1, kernel=poly; total time=   0.0s
[CV] END .......................C=0.1, degree=1, kernel=poly; total time=   0.0s
[CV] END ....................C=0.1, degree=1, kernel=sigmoid; total time=   0.0s
[CV] END ....................C=0.1, degree=1, kernel=sigmoid; total time=   0.0s
[CV] END ....................C=0.1, degree=1, kernel=sigmoid; total time=   0.0s
[CV] END .....................C=0.1, degree=1, kernel=linear; total time=   0.0s
[CV] END .....................C=0.1, degree=1, kernel=linear; total time=   0.0s
[CV] END .....................C=0.1, degree=1, kernel=linear; total time=   0.0s
[CV] END ........................C=0.1, degree=2, kernel=rbf; total time=   0.0s
[CV] END ........................C=0.1, degree=2, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, degree=2, kernel=poly; total time=   0.0s
[CV] END ...................

[CV] END .......................C=0.1, degree=1, kernel=poly; total time=   0.0s
[CV] END ..........................C=1, degree=2, kernel=rbf; total time=   0.0s
[CV] END .........................C=1, degree=2, kernel=poly; total time=   0.0s
[CV] END .........................C=1, degree=2, kernel=poly; total time=   0.0s
[CV] END .........................C=1, degree=2, kernel=poly; total time=   0.0s
[CV] END .........................C=1, degree=2, kernel=poly; total time=   0.0s
[CV] END .........................C=1, degree=2, kernel=poly; total time=   0.0s
[CV] END ......................C=1, degree=2, kernel=sigmoid; total time=   0.0s
[CV] END ......................C=1, degree=2, kernel=sigmoid; total time=   0.0s
[CV] END ..........................C=1, degree=4, kernel=rbf; total time=   0.0s
[CV] END .........................C=1, degree=4, kernel=poly; total time=   0.0s
[CV] END .........................C=1, degree=4, kernel=poly; total time=   0.0s
[CV] END ...................

[CV] END ........................C=0.1, degree=1, kernel=rbf; total time=   0.0s
[CV] END ........................C=0.1, degree=4, kernel=rbf; total time=   0.0s
[CV] END ........................C=0.1, degree=4, kernel=rbf; total time=   0.0s
[CV] END ....................C=0.1, degree=4, kernel=sigmoid; total time=   0.0s
[CV] END ....................C=0.1, degree=4, kernel=sigmoid; total time=   0.0s
[CV] END .......................C=0.1, degree=5, kernel=poly; total time=   0.0s
[CV] END .......................C=0.1, degree=5, kernel=poly; total time=   0.1s
[CV] END ....................C=0.1, degree=5, kernel=sigmoid; total time=   0.0s
[CV] END ....................C=0.1, degree=5, kernel=sigmoid; total time=   0.0s
[CV] END .........................C=1, degree=5, kernel=poly; total time=   0.0s
[CV] END .........................C=1, degree=5, kernel=poly; total time=   0.1s
[CV] END ......................C=1, degree=5, kernel=sigmoid; total time=   0.0s
[CV] END ...................

[CV] END .......................C=0.1, degree=1, kernel=poly; total time=   0.0s
[CV] END ......................C=1, degree=2, kernel=sigmoid; total time=   0.0s
[CV] END ......................C=1, degree=2, kernel=sigmoid; total time=   0.0s
[CV] END ......................C=1, degree=2, kernel=sigmoid; total time=   0.0s
[CV] END .......................C=1, degree=2, kernel=linear; total time=   0.0s
[CV] END .......................C=1, degree=2, kernel=linear; total time=   0.0s
[CV] END .......................C=1, degree=2, kernel=linear; total time=   0.0s
[CV] END .......................C=1, degree=2, kernel=linear; total time=   0.0s
[CV] END .......................C=1, degree=2, kernel=linear; total time=   0.0s
[CV] END ......................C=1, degree=4, kernel=sigmoid; total time=   0.0s
[CV] END ......................C=1, degree=4, kernel=sigmoid; total time=   0.0s
[CV] END ......................C=1, degree=4, kernel=sigmoid; total time=   0.0s
[CV] END ...................

[CV] END criterion=gini, max_depth=3, min_samples_leaf=20, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=3, min_samples_leaf=20, min_samples_split=12; total time=   0.2s
[CV] END criterion=gini, max_depth=3, min_samples_leaf=20, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=8; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=18; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=5, min_samples_split=8; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=5, min_samples_split=12; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=5, min_samples_split=18; total time=   0.2s
[CV] END 

[CV] END criterion=gini, max_depth=5, min_samples_leaf=10, min_samples_split=18; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=10, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=15, min_samples_split=8; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=15, min_samples_split=12; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=15, min_samples_split=18; total time=   0.3s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=15, min_samples_split=16; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=20, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=20, min_samples_split=12; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=20, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=5, min_samples_leaf=20, min_samples_split=16; total time=   0.2s
[

[CV] END criterion=gini, max_depth=7, min_samples_leaf=5, min_samples_split=16; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=10, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=10, min_samples_split=12; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=10, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=15, min_samples_split=8; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=15, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=15, min_samples_split=18; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=15, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=20, min_samples_split=8; total time=   0.2s
[CV] END criterion=gini, max_depth=7, min_samples_leaf=20, min_samples_split=10; total time=   0.2s
[CV


[CV] END criterion=gini, max_depth=10, min_samples_leaf=10, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=10, min_samples_split=12; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=10, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=10, min_samples_split=16; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=15, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=15, min_samples_split=12; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=15, min_samples_split=20; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=20, min_samples_split=8; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=20, min_samples_split=10; total time=   0.2s
[CV] END criterion=gini, max_depth=10, min_samples_leaf=20, min_samples_split=18; total tim

[CV] END criterion=entropy, max_depth=10, min_samples_leaf=15, min_samples_split=12; total time=   0.2s[CV] END criterion=entropy, max_depth=3, min_samples_leaf=5, min_samples_split=10; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=5, min_samples_split=12; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=5, min_samples_split=20; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=10; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=18; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=20; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=15, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=15, min_samples_spli

[CV] END criterion=entropy, max_depth=3, min_samples_leaf=5, min_samples_split=16; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=12; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=20; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=10, min_samples_split=16; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=15, min_samples_split=10; total time=   0.3s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=15, min_samples_split=18; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=15, min_samples_split=20; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=20, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=20, min_samples_sp

In [22]:
   
df = pd.DataFrame(scores,columns=['model','best_score','best_params','score1'])
df


Unnamed: 0,model,best_score,best_params,score1
0,svm,0.986667,"{'C': 0.1, 'degree': 2, 'kernel': 'poly'}",
1,random_forest,0.966667,"{'criterion': 'gini', 'max_depth': 7, 'min_sam...",
2,logistic_regression,0.966667,{'C': 5},


In [23]:
from sklearn.svm import SVC
svm = SVC(gamma='auto')


In [24]:
scores1=cross_val_score(SVC(gamma='auto',kernel='poly',degree=2,C=0.1), iris.data, iris.target,cv=7)
scores1.mean()

0.9802102659245516

In [25]:
## here basically we run a loop and in final we get about gridserach cv result of every model parameters
scores = [] 
for model_name, mp in model_params.items():
    rs =  RandomizedSearchCV(mp['model'], mp['params'],n_jobs=-1,cv=5, return_train_score=False,verbose=2)
    rs.fit(iris.data, iris.target)
    score1=cross_val_score(mp['model'], iris.data, iris.target,cv=5)
    score1.mean()
    scores.append({
        'model': model_name,
        'best_score': rs.best_score_,
        'best_params': rs.best_params_,
        'cross_val_Score': score1.mean()
    })

 #the whole inforamtion we get here is just becos of using verbose=2

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=20, min_samples_split=10; total time=   0.2s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=20, min_samples_split=18; total time=   0.3s
[CV] END criterion=entropy, max_depth=3, min_samples_leaf=20, min_samples_split=20; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=10; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=18; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=16; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=5, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=5, min_samples_split=12; total time=   0.2s
[CV] END criterion=e

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=5, min_samples_split=10; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=5, min_samples_split=18; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=5, min_samples_split=20; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=10, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=10, min_samples_split=10; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=10, min_samples_split=18; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=10, min_samples_split=20; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=15, min_samples_split=8; total time=   0.2s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=15, min_samples_split=12; total time=   0.2s
[CV] END criterio

Fitting 5 folds for each of 3 candidates, totalling 15 fits




In [26]:
RS_df = pd.DataFrame(scores,columns=['model','best_score','best_params','score1'])
RS_df


Unnamed: 0,model,best_score,best_params,score1
0,svm,0.986667,"{'kernel': 'poly', 'degree': 2, 'C': 0.1}",
1,random_forest,0.966667,"{'min_samples_split': 12, 'min_samples_leaf': ...",
2,logistic_regression,0.966667,{'C': 5},
