In [46]:
#### choosing the best model based on problem scope/evaluation metrics from other models
#### optimizing hyperparameters to maximize model performance

# Eg) classification problem on the iris dataset:
# possible choices:
# ---> Multiclass Logistic Regression
# ---> Decision Tree
# ---> Random Forest
# ---> SVM
# ---> Naive Bayes

# assume we choose the SVM ----> hyperparamaters ----> what kernel to choose: rbf, linear
#                                                ----> what C and gamma to choose

# Hyperparameter tuning ---> process of choosing the optimal hyperparameter val.

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV

In [6]:
iris = load_iris()
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [14]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['flower'] = iris.target
df['flower'] = df['flower'].apply(lambda x: iris.target_names[x])
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [31]:
X = df.drop(['flower'], axis=1)
y= df['flower']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [36]:
# since we are not using random state - training samples vary everytime so accuracy fluctuates
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model = SVC(kernel='rbf', C=1.0, gamma='auto')
    model.fit(X_train, y_train)
    print("epoch {} score: ".format(i), model.score(X_test, y_test))

epoch 0 score:  0.9666666666666667
epoch 1 score:  0.9666666666666667
epoch 2 score:  1.0
epoch 3 score:  1.0
epoch 4 score:  0.9333333333333333
epoch 5 score:  0.9333333333333333
epoch 6 score:  1.0
epoch 7 score:  0.9333333333333333
epoch 8 score:  1.0
epoch 9 score:  0.9333333333333333


In [38]:
# k-fold cross validation

cross_val_score(SVC(kernel='linear', C=3.0, gamma='auto'), iris.data, iris.target, cv=5)

array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

In [39]:
cross_val_score(SVC(kernel='rbf', C=3.0, gamma='auto'), iris.data, iris.target, cv=5)

array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

In [41]:
cross_val_score(SVC(kernel='rbf', C=1.0, gamma='auto'), iris.data, iris.target, cv=5)

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [45]:
# for loop to find optimal hyperparameters

kernels = ['linear', 'rbf']
C = range(1,20)
avg_scores = {}

for k in kernels:
    for val in C:
        cv_score = cross_val_score(SVC(kernel=k, C=val, gamma='auto'), iris.data, iris.target, cv=5)
        avg_scores[k + '--> C = ' + str(val)] = np.average(cv_score)

avg_scores

{'linear--> C = 1': np.float64(0.9800000000000001),
 'linear--> C = 2': np.float64(0.9800000000000001),
 'linear--> C = 3': np.float64(0.9733333333333334),
 'linear--> C = 4': np.float64(0.9733333333333334),
 'linear--> C = 5': np.float64(0.9800000000000001),
 'linear--> C = 6': np.float64(0.9733333333333334),
 'linear--> C = 7': np.float64(0.9733333333333334),
 'linear--> C = 8': np.float64(0.9733333333333334),
 'linear--> C = 9': np.float64(0.9733333333333334),
 'linear--> C = 10': np.float64(0.9733333333333334),
 'linear--> C = 11': np.float64(0.9733333333333334),
 'linear--> C = 12': np.float64(0.9733333333333334),
 'linear--> C = 13': np.float64(0.9733333333333334),
 'linear--> C = 14': np.float64(0.9666666666666666),
 'linear--> C = 15': np.float64(0.9666666666666666),
 'linear--> C = 16': np.float64(0.9666666666666666),
 'linear--> C = 17': np.float64(0.9666666666666666),
 'linear--> C = 18': np.float64(0.9666666666666666),
 'linear--> C = 19': np.float64(0.9666666666666666),
 '

In [50]:
### This process of nested for loops for different hyperparameters scales up.
### sklearn has an API called gridsearchCV which does the same thing.
### high computation costs

clf = GridSearchCV(SVC(gamma='auto'), {
    'C':range(1,20),
    'kernel':['linear','rbf']
}, cv=5, return_train_score=False)

clf.fit(iris.data, iris.target)

0,1,2
,estimator,SVC(gamma='auto')
,param_grid,"{'C': range(1, 20), 'kernel': ['linear', 'rbf']}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,C,4
,kernel,'rbf'
,degree,3
,gamma,'auto'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [51]:
clf.cv_results_

{'mean_fit_time': array([0.00047655, 0.00055423, 0.00046172, 0.00050774, 0.00044065,
        0.00150838, 0.00220466, 0.00241075, 0.00145478, 0.00159559,
        0.00139475, 0.00160155, 0.00138893, 0.00155497, 0.00144248,
        0.00152397, 0.00194726, 0.00255232, 0.002424  , 0.00253162,
        0.00313025, 0.00272946, 0.00241981, 0.0026504 , 0.00275712,
        0.00273256, 0.00186629, 0.00254531, 0.0022471 , 0.00241261,
        0.00238762, 0.00251565, 0.00234909, 0.00244694, 0.00229268,
        0.00253215, 0.002285  , 0.00252819]),
 'std_fit_time': array([1.09243320e-04, 3.20172629e-05, 6.39812771e-05, 6.58293687e-05,
        2.42453760e-05, 1.29890685e-03, 2.09598274e-04, 3.30143281e-04,
        1.33155306e-04, 1.28479399e-04, 7.40350154e-05, 1.75891041e-04,
        7.91586450e-05, 3.03880356e-05, 9.95854056e-05, 9.28895429e-05,
        7.40498793e-04, 1.82963132e-04, 2.10863516e-04, 1.00889390e-04,
        5.95900220e-04, 3.76929124e-04, 2.32667491e-04, 1.43881818e-04,
        4.512

In [55]:
df = pd.DataFrame(clf.cv_results_)
df[['param_C', 'param_kernel', 'mean_test_score']]

# first 4 are the most optimal 

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.98
1,1,rbf,0.98
2,2,linear,0.98
3,2,rbf,0.98
4,3,linear,0.973333
5,3,rbf,0.973333
6,4,linear,0.973333
7,4,rbf,0.986667
8,5,linear,0.98
9,5,rbf,0.98


In [56]:
clf.best_score_

np.float64(0.9866666666666667)

In [57]:
clf.best_params_

{'C': 4, 'kernel': 'rbf'}