In [1]:
# GridSearachCV PRACTICE FOR HYPER TUNING OF PARAMETERS OF SAME MODEL AND TO FIND BEST MODEL

In [2]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import warnings
from sklearn.neighbors import KNeighborsClassifier

In [3]:
warnings.filterwarnings('ignore')

In [4]:
iris = load_iris()

In [5]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [6]:
df['target'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [7]:
df['flower_name'] = df.target.apply(lambda x : iris.target_names[x])
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,flower_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [8]:
X = df.drop(['target', 'flower_name'], axis='columns')

# TYPICAL WAY FOR HYPER PARAMETER TUNING

In [9]:
cross_val_score(SVC(C=1, gamma='auto', kernel='rbf'), X, df.target)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [10]:
cross_val_score(SVC(C=1, gamma='auto', kernel='linear'), X, df.target)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [11]:
cross_val_score(SVC(C=1, gamma=10, kernel='linear'), X, df.target)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [12]:
cross_val_score(SVC(C=10, kernel='linear'), X, df.target)

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [24]:
cross_val_score(SVC(C=10, gamma=10, kernel='rbf'), X, df.target)

array([1.        , 0.9       , 0.86666667, 0.93333333, 0.96666667])

In [40]:
cross_val_score(KNeighborsClassifier(n_neighbors=10), X, df.target)

array([0.96666667, 1.        , 1.        , 0.93333333, 1.        ])

# SECOND WAY FOR HYPER PARAMETER TUNING

In [14]:
C = [10, 20, 30]
kernel = ['rbf', 'linear']
scores = {}

for i in C:
    for j in kernel:
        model = cross_val_score(SVC(C=i, kernel=j), X, df.target)
        scores[j + '_' + str(i)] = {
            np.average(model)
        }

scores

{'rbf_10': {0.9800000000000001},
 'linear_10': {0.9733333333333334},
 'rbf_20': {0.9800000000000001},
 'linear_20': {0.9666666666666666},
 'rbf_30': {0.9733333333333334},
 'linear_30': {0.96}}

# THIRD WAY FOR HYPER PARAMETER TUNING

In [15]:
from sklearn.model_selection import GridSearchCV
model = GridSearchCV(SVC(), {
    'C' : [10, 20, 30],
    'kernel' : ['rbf', 'linear']
}, cv=5, return_train_score=False)
model.fit(X, df.target)

In [16]:
pd.DataFrame(model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002976,0.00165,0.002047,0.001149,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.001415,0.000793,0.0006,0.00049,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,3
2,0.003126,0.006252,0.0,0.0,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.003126,0.006251,0.0,0.0,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,5
4,0.0,0.0,0.003125,0.00625,30,rbf,"{'C': 30, 'kernel': 'rbf'}",0.966667,1.0,0.933333,0.966667,1.0,0.973333,0.024944,3
5,0.0,0.0,0.003125,0.00625,30,linear,"{'C': 30, 'kernel': 'linear'}",1.0,1.0,0.9,0.9,1.0,0.96,0.04899,6


In [17]:
model.best_params_

{'C': 10, 'kernel': 'rbf'}

# BEST AND OPTIMAL PROCEDURE FOR HYPER PARAMETER TUNING

In [18]:
from sklearn.model_selection import RandomizedSearchCV
model = RandomizedSearchCV(SVC(), {
    'C' : [10, 20, 30],
    'kernel' : ['rbf', 'linear']
}, cv=5, return_train_score=False, n_iter=4)
model.fit(X, df.target)

In [19]:
pd.DataFrame(model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001599,0.001496,0.00416,0.006351,linear,30,"{'kernel': 'linear', 'C': 30}",1.0,1.0,0.9,0.9,1.0,0.96,0.04899,4
1,0.000473,0.00059,0.000822,0.001007,rbf,10,"{'kernel': 'rbf', 'C': 10}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.001918,0.000164,0.001007,1.4e-05,rbf,30,"{'kernel': 'rbf', 'C': 30}",0.966667,1.0,0.933333,0.966667,1.0,0.973333,0.024944,2
3,0.002015,3e-05,0.001,0.000632,linear,20,"{'kernel': 'linear', 'C': 20}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,3


In [44]:
model.best_params_

{'n_neighbors': 10}

In [71]:
model = RandomizedSearchCV(KNeighborsClassifier(), {
    'n_neighbors' : [2,4,6,8,10,12,14,16,18,20],
}, cv=3, return_train_score=False, n_iter=4)
model.fit(X, df.target)

In [72]:
pd.DataFrame(model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.000729,0.001031,0.004935,0.003547,12,{'n_neighbors': 12},0.96,1.0,0.94,0.966667,0.024944,2
1,0.001694,0.001275,0.008484,0.005651,6,{'n_neighbors': 6},0.98,0.98,0.96,0.973333,0.009428,1
2,0.000999,2e-06,0.004202,0.000289,18,{'n_neighbors': 18},0.96,0.96,0.94,0.953333,0.009428,4
3,0.000669,0.000946,0.005227,0.007392,8,{'n_neighbors': 8},0.98,0.98,0.94,0.966667,0.018856,2


# NOW THE WAY FOR BEST MODEL SELECTION

In [73]:
model_params = {
    'svm' : {
        'model' : SVC(),
        'params' : {
            'C' : [10, 20, 30],
            'kernel' : ['rbf', 'linear']
        }
    },
    'log_res' : {
        'model' : LogisticRegression(),
        'params' : {
            'C' : [10, 20, 30],
        }
    },
    'tree' : {
        'model' : DecisionTreeClassifier(),
        'params' : {}
    },
    'forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators' : [50, 100, 150, 200]
        }
    },
    'knn' : {
        'model' : KNeighborsClassifier(),
        'params' : {
            'n_neighbors' : [5, 10, 15]
        }
    }
}

In [74]:
scores = []

for models, params in model_params.items():
    model = RandomizedSearchCV(params['model'], params['params'], return_train_score=False, cv=5, n_iter=5)
    model.fit(X, df.target)
    scores.append({
        'model' : params['model'],
        'best_score' : model.best_score_,
        'best_params' : model.best_params_
    })

In [75]:
pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])

Unnamed: 0,model,best_score,best_params
0,SVC(),0.98,"{'kernel': 'rbf', 'C': 10}"
1,LogisticRegression(),0.98,{'C': 10}
2,DecisionTreeClassifier(),0.966667,{}
3,RandomForestClassifier(),0.966667,{'n_estimators': 50}
4,KNeighborsClassifier(),0.98,{'n_neighbors': 10}
