Comparing multiple models with different parameters to calculate best working model alongwith their best hyper parameter for digits dataset of sklearn

In [3]:
from sklearn import datasets
digits = datasets.load_digits()
import pandas as pd

In [4]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

In [5]:
models = {
    'svm' : {
        'model' : svm.SVC(gamma = 'auto'),
        'params' : {
            'C' : [1,10,20],
            'kernel' : ['linear', 'rbf']
        }
    },

    'random_forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators' : [10, 50, 100],
        }
    },

    'Logistic_Regression' : {
        'model' : LogisticRegression(solver='liblinear',multi_class='auto'),
        'params' : {
            'C' : [1,5,10]
        }
    },

    'Gaussian_NB' : {
        'model' : GaussianNB(),
        'params' : {}
    },

    'Multinomial_NB' : {
        'model' : MultinomialNB(),
        'params' : {}
    },

    'Decision_Tree' : {
        'model' : DecisionTreeClassifier(),
        'params' : {
            'criterion' : ['gini', 'entropy'],
        }
    }
}

In [9]:
scores = []
for model_name, mp in models.items():
    clf = GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
    clf.fit(digits.data, digits.target)
    scores.append({
        'model' : model_name,
        'best_score' : clf.best_score_,
        'best_params' : clf.best_params_
    })

df = pd.DataFrame(scores,columns=['model','best_score','best_params'])



In [10]:
df

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.936031,{'n_estimators': 50}
2,Logistic_Regression,0.922114,{'C': 1}
3,Gaussian_NB,0.806928,{}
4,Multinomial_NB,0.87035,{}
5,Decision_Tree,0.805822,{'criterion': 'entropy'}


From this we can conclude, for digits dataset, SVM works best with hyperparameters as C = 1, kernel = linear