In [1]:
# GridSearachCV EXAMPLE FOR HYPER TUNING OF PARAMETERS OF SAME MODEL AND TO FIND BEST MODEL

In [25]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [3]:
iris = load_iris()

In [4]:
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [5]:
df['target'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [6]:
df['flower_name'] = df.target.apply(lambda x : iris.target_names[x])
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,flower_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [7]:
X = df.drop(['target', 'flower_name'], axis='columns')

# FIRST WAY TO DETECT HYPER PARAMETER TUNING

In [8]:
cross_val_score(SVC(), X, df.target, cv=3)

array([0.96, 0.98, 0.94])

In [9]:
cross_val_score(SVC(C=10, gamma=10), X, df.target, cv=3)

array([1.  , 0.9 , 0.94])

In [10]:
cross_val_score(SVC(C=20, gamma=30), X, df.target, cv=3)

array([0.8 , 0.76, 0.88])

In [11]:
cross_val_score(SVC(C=20, kernel='rbf'), X, df.target, cv=3)

array([0.98, 0.98, 0.98])

In [12]:
cross_val_score(SVC(C=20, kernel='linear'), X, df.target, cv=3)

array([1.  , 0.94, 0.96])

# SECOND WAY TO DETECT HYPER PARAMETER TUNING

In [13]:
c = [10, 20, 30]
gamma = [10, 20]
kernal = ['rbf', 'linear']
avg_score = {}

for i in kernal:
    for j in gamma:
        for k in c:
            scores = cross_val_score(SVC(kernel=i, gamma=j, C=k), X, df.target, cv=10)
            avg_score[i + '_' + str(j) + '_' + str(k)] = np.average(scores)

In [14]:
avg_score

{'rbf_10_10': 0.9400000000000001,
 'rbf_10_20': 0.9400000000000001,
 'rbf_10_30': 0.9400000000000001,
 'rbf_20_10': 0.8933333333333333,
 'rbf_20_20': 0.8933333333333333,
 'rbf_20_30': 0.8933333333333333,
 'linear_10_10': 0.9800000000000001,
 'linear_10_20': 0.9666666666666668,
 'linear_10_30': 0.9666666666666668,
 'linear_20_10': 0.9800000000000001,
 'linear_20_20': 0.9666666666666668,
 'linear_20_30': 0.9666666666666668}

# THIRD WAY TO DETECT HYPER TUNING PARAMETERS

In [15]:
from sklearn.model_selection import GridSearchCV
model = GridSearchCV(SVC(), {
    'C' : [10, 20],
    'kernel' : ['rbf', 'linear'],
}, cv=5, return_train_score=False)
model.fit(iris.data, iris.target)

In [16]:
pd.DataFrame(model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002307,0.002414,0.000801,0.0004,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.001013,2e-05,0.000966,6.7e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,3
2,0.001102,0.000205,0.0008,0.0004,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.000615,0.000503,0.0,0.0,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,4


In [17]:
model.best_params_

{'C': 10, 'kernel': 'rbf'}

# FORTH OPTIMAL WAY TO FIND HYPER PARAMETERS TURING

In [18]:
from sklearn.model_selection import RandomizedSearchCV
model = RandomizedSearchCV(SVC(), {
    'C' : [10, 20],
    'kernel' : ['rbf', 'linear'],
}, cv=5, n_iter= 2, return_train_score=False)
model.fit(iris.data, iris.target)

In [49]:
pd.DataFrame(model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.115588,0.012846,0.012923,0.006478,100,{'n_estimators': 100},0.966667,0.966667,0.933333,0.933333,1.0,0.96,0.024944,3
1,0.222822,0.012339,0.019915,0.008635,200,{'n_estimators': 200},0.966667,0.966667,0.933333,0.966667,1.0,0.966667,0.021082,1
2,0.332789,0.024795,0.025378,0.007981,300,{'n_estimators': 300},0.966667,0.966667,0.933333,0.966667,1.0,0.966667,0.021082,1


# NOW, FINDING THE OPTIMAL MODEL

In [60]:
model_params = {
    'svm' : {
        'model' : SVC(),
        'params' : {
            'C' : [10, 20, 30],
            'kernel' : ['rbf', 'linear'],
            'gamma' : [10, 20, 30]
        }   
    },
    'linear_reg' : {
        'model' : LinearRegression(),
        'params' : {}
    },
    'logistic_reg' : {
        'model' : LogisticRegression(),
        'params' : {
            'C' : [10, 20]
        }
    },
    'rand_forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators' : [100, 200, 300]
        }
    }
}

In [62]:
score = []

for models, mp in model_params.items():
    model = RandomizedSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False, n_iter=5)
    model.fit(iris.data, iris.target)
    score.append({
        'model' : models,
        'best_score' : model.best_score_,
        'best_params' : model.best_params_
    })

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [63]:
df1 = pd.DataFrame(score, columns=['model', 'best_score', 'best_params'])
df1

Unnamed: 0,model,best_score,best_params
0,svm,0.96,"{'kernel': 'linear', 'gamma': 30, 'C': 30}"
1,linear_reg,0.322561,{}
2,logistic_reg,0.98,{'C': 10}
3,rand_forest,0.966667,{'n_estimators': 300}
