In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB


from sklearn.grid_search import GridSearchCV



In [None]:
#Dictionaries that hold parameters 
paramsRandomForest = {
    'max_depth': [],
    'min_samples_split': [2,3,5,6],
    'min_samples_leaf':[2,4,5],
    'n_estimators': [1500,1800]
}

# finish parameter values
paramsDecisionTrees = {}
paramsExtraTrees = {}
paramsKNN = {}
paramsAdaboost = {}
paramsNeuralNet = {}
paramsSVM = {}

In [2]:
# Dictionary of algorithms (with their parameters)

algs = {
    'randomForest': paramsRandomForest,
    'decisionTrees': paramsDecisionTrees,
    'extraTrees:' paramsExtraTrees,
    'KNN': paramsKNN,
    'adaboost': paramsAdaboost,
    'neuralNet': paramsNeuralNet,
    'SVM': paramsSVM,
    'naiveBayes': {},
    'gaussian': {},
} 


df = pd.DataFrame()
df['classifier name'] = ['KNN', 'Decision Tree', 'Naive Bayes', 'SVM', 'Gaussian Process', 'Random Forest', 'Neural Net', 'AdaBoost', 'Extra Trees Classifier']

In [3]:
def gridSearch(dataset_name, X, y, num_iterations):
    
    for i in range(1, num_iterations):
        name = dataset_name + str(i)
        models = []
        models.append(('KNN', KNeighborsClassifier()
        models.append(('Decision Tree', DecisionTreeClassifier())
        models.append(('Naive Bayes', GaussianNB())
        models.append(('SVM', SVC())
        models.append(('Gaussian Process', GaussianProcessClassifier()))
        models.append(('Random Forest', RandomForestClassifier())
        models.append(('Neural Net', MLPClassifier())
        models.append(('AdaBoost', AdaBoostClassifier())
        models.append(('Extra Trees Classifier', ExtraTreesClassifier())
       
        run_dataset(name, X, y, models, algs) 
                      
    return df

In [4]:
def run_dataset(dataset_name, X, y, models, algs):
    iter_range = range(1,6)
    average_accuracy = 0.0
    
    accuracy_list = []
    # for name, model in models:
    for (name, model), (alg, params) in zip(models.items(), algs.items()):
        clf = GridSearchCV(model, params, cv=10, scoring='roc_auc')
        clf.fit(X, y)
        
        # print( best accuracy and associated params
        print(clf.best_score_)
        print(clf.best_params_)
            
        # append mean of best score
        accuracy_list.append(cross_val_score(clf, X, y, cv=10, scoring='roc_auc').mean())
        
    se = pd.Series(accuracy_list)
    df[dataset_name] = se.values