In [10]:
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.grid_search import GridSearchCV

In [11]:
bioresponce = pd.read_csv('bioresponse.csv', header=0, sep=',')

y = bioresponce.Activity.values
x = bioresponce.iloc[:, 1:]

# GridSearchCV

## KNeighborsClassifier

In [12]:
%%time
tuned_parameters = [{'algorithm': ['auto', 'ball_tree','kd_tree','brute'], 'n_neighbors': [1,2,3,4,5]}]
gs = GridSearchCV(KNeighborsClassifier(), tuned_parameters, cv=5, n_jobs=-1)
gs.fit(x,y)
print(gs.best_params_)
print(gs.best_score_)

{'algorithm': 'auto', 'n_neighbors': 3}
0.7494001599573447
Wall time: 5min 21s


## DecisionTreeClassifier

In [13]:
%%time
tuned_parameters = [{'criterion': ['entropy','gini'], 'max_depth': [1,2,3,4,5], 
                     'min_samples_leaf': [1,2,3,4,5], 'random_state': [64]}]
gs = GridSearchCV(DecisionTreeClassifier(), tuned_parameters, cv=5, n_jobs=-1)
gs.fit(x,y)
print(gs.best_params_)
print(gs.best_score_)

{'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 2, 'random_state': 64}
0.761663556384964
Wall time: 1min 1s


## LinearSVC

In [14]:
%%time
tuned_parameters = [{'C': [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], 'tol': [1e-4, 1e-3, 1e-2, 1e-1]}]
gs = GridSearchCV(LinearSVC(), tuned_parameters, cv=5, n_jobs=-1)
gs.fit(x,y)
print(gs.best_params_)
print(gs.best_score_)

{'C': 1.0, 'tol': 0.1}
0.7400693148493735
Wall time: 5min 31s


## RandomForestClassifier

In [15]:
from scipy.stats import randint as sp_randint

In [18]:
%%time
tuned_parameters = [{'max_depth': [3, None], 
                    'max_features': [1,2,3,4,5,6],
                    'min_samples_split': [2,3,4,5,6], 
                    'min_samples_leaf': [1,2,3,4,5,6],
                    'bootstrap': [True, False],
                    'criterion': ['gini', 'entropy']}]
gs = GridSearchCV(RandomForestClassifier(n_estimators=20), tuned_parameters, cv=5, n_jobs=-1)
gs.fit(x,y)
print(gs.best_params_)
print(gs.best_score_)

{'bootstrap': False, 'criterion': 'entropy', 'max_depth': None, 'max_features': 6, 'min_samples_leaf': 1, 'min_samples_split': 5}
0.782991202346041
Wall time: 16min 42s


## GradientBoostingClassifier

In [20]:
%%time
tuned_parameters = [{'n_estimators': [10, 20, 30], 'learning_rate': [0.05,0.2,0.15],
                     'max_depth':[1,2,3,4,5]}]
gs = GridSearchCV(GradientBoostingClassifier(), tuned_parameters, cv=5, n_jobs=-1)
gs.fit(x,y)
print(gs.best_params_)
print(gs.best_score_)

{'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 30}
0.7885897094108237
Wall time: 9min 5s


# RandomizedSearchCV

In [21]:
from sklearn.model_selection import RandomizedSearchCV

In [23]:
%%time
tuned_parameters = {"max_depth": [3, None], 
                    "max_features": sp_randint(1, 6),
                    "min_samples_split": sp_randint(2, 6), 
                    "min_samples_leaf": sp_randint(1, 6),
                    "bootstrap": [True, False],
                    "criterion": ["gini", "entropy"]}
n_iter_search = 20
rs = RandomizedSearchCV(RandomForestClassifier(n_estimators=20), 
                                   param_distributions=tuned_parameters, n_iter=n_iter_search)
rs.fit(x, y)
print(rs.best_params_)
print(rs.best_score_)

{'bootstrap': True, 'criterion': 'entropy', 'max_depth': None, 'max_features': 4, 'min_samples_leaf': 3, 'min_samples_split': 3}
0.736870167955
Wall time: 11.5 s


In [None]:
#функция RandomizedSearchCV позволяет перебрать заданное число случайных наборов параметров (n_iter_search)
#из заданного множества набора параметров (tuned_parameters)