In [1]:
import pandas as pd
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [3]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [5]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [10]:
cross_val_score(SVC(kernel='linear',C=10,gamma='auto'),iris.data, iris.target, cv=5)

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [11]:
import numpy as np

In [14]:
kernels=['linear', 'poly', 'rbf']
C = [1, 10, 100]

avg_scores = {}
for kval in kernels:
    for cval in C:
        score = cross_val_score(SVC(kernel=kval, C=cval, gamma='auto'), iris.data, iris.target, cv=5)
        avg_scores[kval + '_' + str(cval)] = np.average(score)
avg_scores

{'linear_1': 0.9800000000000001,
 'linear_10': 0.9733333333333334,
 'linear_100': 0.9666666666666666,
 'poly_1': 0.9666666666666666,
 'poly_10': 0.9666666666666666,
 'poly_100': 0.9466666666666667,
 'rbf_1': 0.9800000000000001,
 'rbf_10': 0.9800000000000001,
 'rbf_100': 0.96}

In [15]:
from sklearn.model_selection import GridSearchCV

In [18]:
clf = GridSearchCV(SVC(gamma='auto'), {
    'kernel' : ['linear', 'poly', 'rbf'],
    'C' : [1, 10, 20]
}, cv=5)
clf.fit(iris.data, iris.target)
clf.cv_results_

{'mean_fit_time': array([0.00162005, 0.00259404, 0.00163651, 0.00159702, 0.00422668,
        0.00139899, 0.00099726, 0.0067975 , 0.00120139]),
 'std_fit_time': array([4.78740969e-04, 1.01601961e-03, 5.20648818e-04, 4.86026635e-04,
        2.47471351e-03, 4.87135996e-04, 3.34942042e-06, 5.60009448e-03,
        3.97709747e-04]),
 'mean_score_time': array([0.00143485, 0.00080533, 0.00127025, 0.00100293, 0.001085  ,
        0.00099826, 0.0007988 , 0.00100803, 0.00140376]),
 'std_score_time': array([4.64744228e-04, 4.02749869e-04, 5.40529011e-04, 6.37703261e-04,
        1.58809425e-04, 2.47312593e-06, 3.99433330e-04, 1.15656359e-05,
        7.96829796e-04]),
 'param_C': masked_array(data=[1, 1, 1, 10, 10, 10, 20, 20, 20],
              mask=[False, False, False, False, False, False, False, False,
                    False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'poly', 'rbf', 'linear', 'poly', 'rbf',
                    'linear', '

In [19]:
df2 = pd.DataFrame(clf.cv_results_)
df2.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.00162,0.000479,0.001435,0.000465,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.002594,0.001016,0.000805,0.000403,1,poly,"{'C': 1, 'kernel': 'poly'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6
2,0.001637,0.000521,0.00127,0.000541,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.001597,0.000486,0.001003,0.000638,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.004227,0.002475,0.001085,0.000159,10,poly,"{'C': 10, 'kernel': 'poly'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6


In [21]:
df2[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.98
1,1,poly,0.966667
2,1,rbf,0.98
3,10,linear,0.973333
4,10,poly,0.966667
5,10,rbf,0.98
6,20,linear,0.966667
7,20,poly,0.953333
8,20,rbf,0.966667


In [23]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression

In [27]:
model_params = {
    'SVM' : {
        'model' : SVC(gamma='auto'),
        'params' : {
            'kernel' : ['linear', 'poly', 'rbf'],
            'C' : [1, 10, 20]
        } 
    },
    'Random_Forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators' : [1, 10, 20]
        }
    },
    'Logistic_Regression' : {
        'model' : LogisticRegression(solver='liblinear',multi_class='auto'),
        'params' : {
            'C' : [1, 10, 20]
        }
    }
}

In [28]:
scores = []

for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=5)
    clf.fit(iris.data, iris.target)
    scores.append({
        'model' : model_name,
        'best_params' : clf.best_params_,
        'best_score' : clf.best_score_
    })

df3 = pd.DataFrame(scores)
df3

Unnamed: 0,model,best_params,best_score
0,SVM,"{'C': 1, 'kernel': 'linear'}",0.98
1,Random_Forest,{'n_estimators': 20},0.966667
2,Logistic_Regression,{'C': 10},0.966667
