In [92]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
digits = datasets.load_digits()

In [93]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [94]:
df = pd.DataFrame(digits.data,columns = digits.feature_names)

In [95]:
df['target'] = digits.target

In [96]:
X = df.drop('target', axis = 1)
y = df.target

In [97]:
from sklearn.model_selection import train_test_split

In [98]:
X_train, X_test, y_train, y_test =  train_test_split(X,y,train_size=0.3)

In [99]:
model_svm = svm.SVC(kernel = 'rbf', C = 10,gamma = 'auto').fit(X_train,y_train)
model_svm.score(X_test,y_test)

0.20349761526232116

In [100]:
model_svm = svm.SVC(kernel = 'linear', C = 10,gamma = 'auto').fit(X_train,y_train)
model_svm.score(X_test,y_test)

0.9697933227344993

In [101]:
model_svm = svm.SVC(kernel = 'poly', C = 10,gamma = 'auto').fit(X_train,y_train)
model_svm.score(X_test,y_test)

0.9825119236883942

In [102]:
from sklearn.model_selection import cross_val_score

In [103]:
cross_val_score(svm.SVC(kernel = 'rbf',C = 10, gamma = 'auto'), X, y, cv = 5)

array([0.45277778, 0.46944444, 0.47910864, 0.47910864, 0.50139276])

In [104]:
cross_val_score(svm.SVC(kernel = 'linear',C = 10, gamma = 'auto'), X, y, cv = 5)

array([0.96388889, 0.91944444, 0.96657382, 0.9637883 , 0.92479109])

In [105]:
cross_val_score(svm.SVC(kernel = 'poly',C = 10, gamma = 'auto'), X, y, cv = 5)

array([0.98333333, 0.94444444, 0.98050139, 0.98885794, 0.94707521])

In [106]:
kernel = ['rbf','linear','poly']
C = [10,25,50]
avg_scores={}
for kval in kernel:
    for cval in C:
        cval_scores = cross_val_score(svm.SVC(kernel = kval, C = cval), X, y, cv = 5)
        avg_scores[kval + '-' + str(cval)] = np.average(cval_scores)
        
avg_scores

{'rbf-10': 0.9738502011761063,
 'rbf-25': 0.9738502011761063,
 'rbf-50': 0.9738502011761063,
 'linear-10': 0.9476973073351903,
 'linear-25': 0.9476973073351903,
 'linear-50': 0.9476973073351903,
 'poly-10': 0.96884246363355,
 'poly-25': 0.96884246363355,
 'poly-50': 0.96884246363355}

In [107]:
from sklearn.model_selection import GridSearchCV

In [108]:
gsc = GridSearchCV(svm.SVC(gamma = 'auto'),{
    'kernel' : ['rbf','linear','poly'],
    'C' : [10,25,50]
},cv = 5, return_train_score = False)

In [109]:
gsc.fit(X,y)

In [110]:
dir(gsc)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__sklearn_clone__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_build_request_for_signature',
 '_check_feature_names',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_estimator_type',
 '_format_results',
 '_get_default_requests',
 '_get_metadata_request',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_parameter_constraints',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run_search',
 '_select_best_index',
 '_validate_data',
 '_validate_params',
 'best_estimator_',
 'best_index_',
 'best_params_',
 'best_score_',
 '

In [111]:
gsc.best_params_

{'C': 10, 'kernel': 'poly'}

In [112]:
gsc.best_score_

0.96884246363355

In [113]:
gsc.cv_results_

{'mean_fit_time': array([0.15699406, 0.01802168, 0.02513666, 0.15485363, 0.01775331,
        0.01998429, 0.15347562, 0.01902809, 0.02214007]),
 'std_fit_time': array([0.00587257, 0.00063079, 0.00258893, 0.00896457, 0.00062351,
        0.00060616, 0.01123553, 0.00212643, 0.00338503]),
 'mean_score_time': array([0.05024767, 0.00652246, 0.00793681, 0.04938087, 0.00590463,
        0.00716991, 0.04853559, 0.00583472, 0.00694041]),
 'std_score_time': array([0.00159247, 0.00042019, 0.00105517, 0.00057151, 0.00021052,
        0.00075009, 0.0006476 , 0.00029118, 0.00098125]),
 'param_C': masked_array(data=[10, 10, 10, 25, 25, 25, 50, 50, 50],
              mask=[False, False, False, False, False, False, False, False,
                    False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'poly', 'rbf', 'linear', 'poly',
                    'rbf', 'linear', 'poly'],
              mask=[False, False, False, False, False, False, False, F

In [114]:
df2 = pd.DataFrame(gsc.cv_results_)
df2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.156994,0.005873,0.050248,0.001592,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.452778,0.469444,0.479109,0.479109,0.501393,0.476366,0.015784,7
1,0.018022,0.000631,0.006522,0.00042,10,linear,"{'C': 10, 'kernel': 'linear'}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,4
2,0.025137,0.002589,0.007937,0.001055,10,poly,"{'C': 10, 'kernel': 'poly'}",0.983333,0.944444,0.980501,0.988858,0.947075,0.968842,0.019056,1
3,0.154854,0.008965,0.049381,0.000572,25,rbf,"{'C': 25, 'kernel': 'rbf'}",0.452778,0.469444,0.479109,0.479109,0.501393,0.476366,0.015784,7
4,0.017753,0.000624,0.005905,0.000211,25,linear,"{'C': 25, 'kernel': 'linear'}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,4
5,0.019984,0.000606,0.00717,0.00075,25,poly,"{'C': 25, 'kernel': 'poly'}",0.983333,0.944444,0.980501,0.988858,0.947075,0.968842,0.019056,1
6,0.153476,0.011236,0.048536,0.000648,50,rbf,"{'C': 50, 'kernel': 'rbf'}",0.452778,0.469444,0.479109,0.479109,0.501393,0.476366,0.015784,7
7,0.019028,0.002126,0.005835,0.000291,50,linear,"{'C': 50, 'kernel': 'linear'}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,4
8,0.02214,0.003385,0.00694,0.000981,50,poly,"{'C': 50, 'kernel': 'poly'}",0.983333,0.944444,0.980501,0.988858,0.947075,0.968842,0.019056,1


In [115]:
df2[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,10,rbf,0.476366
1,10,linear,0.947697
2,10,poly,0.968842
3,25,rbf,0.476366
4,25,linear,0.947697
5,25,poly,0.968842
6,50,rbf,0.476366
7,50,linear,0.947697
8,50,poly,0.968842


In [116]:
from sklearn.model_selection import RandomizedSearchCV

In [117]:
rsc = RandomizedSearchCV(svm.SVC(gamma = 'auto'),{
    'kernel' : ['rbf','linear','poly'],
    'C' : [10,25,50]
}, n_iter = 6, cv = 5, return_train_score = False)

In [118]:
rsc.fit(X,y)

In [119]:
rsc.best_params_

{'kernel': 'poly', 'C': 10}

In [120]:
rsc.best_score_

0.96884246363355

In [121]:
rsc.cv_results_

{'mean_fit_time': array([0.02284975, 0.02223153, 0.15517068, 0.02041931, 0.01816907,
        0.0174962 ]),
 'std_fit_time': array([0.00180731, 0.00161953, 0.00638514, 0.00165708, 0.0017695 ,
        0.00078395]),
 'mean_score_time': array([0.00717711, 0.00886683, 0.05042672, 0.00679197, 0.00580597,
        0.00504723]),
 'std_score_time': array([9.61492417e-04, 2.21649623e-03, 1.19657703e-03, 6.91376481e-04,
        1.14286610e-03, 7.81759361e-05]),
 'param_kernel': masked_array(data=['poly', 'poly', 'rbf', 'poly', 'linear', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_C': masked_array(data=[10, 50, 10, 25, 10, 25],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'kernel': 'poly', 'C': 10},
  {'kernel': 'poly', 'C': 50},
  {'kernel': 'rbf', 'C': 10},
  {'kernel': 'poly', 'C': 25},
  {'kernel': 'linear', 'C': 10},
  {'ke

In [122]:
df3 = pd.DataFrame(rsc.cv_results_)
df3

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.02285,0.001807,0.007177,0.000961,poly,10,"{'kernel': 'poly', 'C': 10}",0.983333,0.944444,0.980501,0.988858,0.947075,0.968842,0.019056,1
1,0.022232,0.00162,0.008867,0.002216,poly,50,"{'kernel': 'poly', 'C': 50}",0.983333,0.944444,0.980501,0.988858,0.947075,0.968842,0.019056,1
2,0.155171,0.006385,0.050427,0.001197,rbf,10,"{'kernel': 'rbf', 'C': 10}",0.452778,0.469444,0.479109,0.479109,0.501393,0.476366,0.015784,6
3,0.020419,0.001657,0.006792,0.000691,poly,25,"{'kernel': 'poly', 'C': 25}",0.983333,0.944444,0.980501,0.988858,0.947075,0.968842,0.019056,1
4,0.018169,0.00177,0.005806,0.001143,linear,10,"{'kernel': 'linear', 'C': 10}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,4
5,0.017496,0.000784,0.005047,7.8e-05,linear,25,"{'kernel': 'linear', 'C': 25}",0.963889,0.919444,0.966574,0.963788,0.924791,0.947697,0.020978,4


In [123]:
df3[['param_kernel','param_C','mean_test_score']]

Unnamed: 0,param_kernel,param_C,mean_test_score
0,poly,10,0.968842
1,poly,50,0.968842
2,rbf,10,0.476366
3,poly,25,0.968842
4,linear,10,0.947697
5,linear,25,0.947697


In [145]:
model_params = {
    'SVM': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'kernel' : ['rbf','linear','poly'],
            'C' : [10,25,50]
        }
    },
    'RandomForestClassifer' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators' : [10,20,30]
        }
    },
    'LogisticRegression' : {
        'model': LogisticRegression(),
        'params' : {
            'C' : [1,5,10]
        }
    },
    'DecisionTreeClassifier' : {
        'model' : DecisionTreeClassifier(),
        'params' : {
            'criterion' : ['gini','entropy']
        }
    },
    'MultiNomialNB' : {
        'model' : MultinomialNB(),
        'params' : {
            'alpha' :[1,2,3]
        }
    },
    'GaussianNB' : {
        'model' : GaussianNB(),
        'params' : {}
    }
}

In [146]:
scores = []
for model_name,mn in model_params.items():
    gsc = GridSearchCV(mn['model'],mn['params'], cv = 5, return_train_score=False)
    gsc.fit(X,y)
    scores.append({
        'model' : model_name,
        'best_score' : gsc.best_score_,
        'best_params' : gsc.best_params_
    })
scores

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[{'model': 'SVM',
  'best_score': 0.96884246363355,
  'best_params': {'C': 10, 'kernel': 'poly'}},
 {'model': 'RandomForestClassifer',
  'best_score': 0.933800680903745,
  'best_params': {'n_estimators': 30}},
 {'model': 'LogisticRegression',
  'best_score': 0.9137650882079852,
  'best_params': {'C': 1}},
 {'model': 'DecisionTreeClassifier',
  'best_score': 0.8019158155369854,
  'best_params': {'criterion': 'entropy'}},
 {'model': 'MultiNomialNB',
  'best_score': 0.8720210461157537,
  'best_params': {'alpha': 3}},
 {'model': 'GaussianNB', 'best_score': 0.8069281956050759, 'best_params': {}}]

In [148]:
df4 = pd.DataFrame(scores)
df4

Unnamed: 0,model,best_score,best_params
0,SVM,0.968842,"{'C': 10, 'kernel': 'poly'}"
1,RandomForestClassifer,0.933801,{'n_estimators': 30}
2,LogisticRegression,0.913765,{'C': 1}
3,DecisionTreeClassifier,0.801916,{'criterion': 'entropy'}
4,MultiNomialNB,0.872021,{'alpha': 3}
5,GaussianNB,0.806928,{}
