In [1]:
# models
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

## Models list:
- LogisticRegression(),
- SGDClassifier(),
- DecisionTreeClassifier(),
- RandomForestClassifier(),
- GradientBoostingClassifier(),
- ExtraTreesClassifier(),
- AdaBoostClassifier(),
- SVC(),
- GaussianNB(),
- MLPClassifier()

In [8]:
grid_search_dict = {}

### LogisticRegression

In [14]:
logreg = LogisticRegression()

print(logreg.get_params())

logreg_params = {
    'C':[1.0, 0.8, 0.6],
    'penalty': ['l2','l1'],
    'random_state': [42, 1453],
    'solver': ['saga','liblinear']
}

grid_search_dict[type(logreg).__name__] = logreg_params
grid_search_dict

{'LogisticRegression': {'C': [1.0, 0.8, 0.6],
  'penalty': ['l2', 'l1'],
  'random_state': [42, 1453],
  'solver': ['saga', 'liblinear']}}

### SGDClassifier

## GradientBoostingClassifier

In [None]:
parameters = {
    "learning_rate": [0.01, 0.05, 0.1, 0.2],
    "min_samples_split": [0.1, 0.2, 0.3],
    "min_samples_leaf": [0.1, 0.3],
    "max_depth":[3,5, 7],
    "subsample":[0.5],
    }


gbc = GradientBoostingClassifier()

In [None]:
%%time
grid_search_gbc = GridSearchCV(
    estimator=gbc,
    param_grid=parameters,
    scoring = 'accuracy',
    n_jobs = 5,
    cv = 5,
    verbose=10
)

grid_search_gbc.fit(train, target)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
CPU times: user 3.52 s, sys: 314 ms, total: 3.83 s
Wall time: 1min 42s


GridSearchCV(cv=5, estimator=GradientBoostingClassifier(), n_jobs=5,
             param_grid={'learning_rate': [0.01, 0.05, 0.1, 0.2],
                         'max_depth': [3, 5, 7], 'min_samples_leaf': [0.1, 0.3],
                         'min_samples_split': [0.1, 0.2, 0.3],
                         'subsample': [0.5]},
             scoring='accuracy', verbose=10)

In [None]:
acc_gbc = round(grid_search_gbc.score(train, target) * 100, 2)
print('train:', acc_gbc)

acc_test_gbc = round(grid_search_gbc.score(test, target_test) * 100, 2)
print('test', acc_test_gbc)


models_results['GradientBoostingClassifier_gridsearch'] = (acc_gbc, acc_test_gbc)

train: 83.09
test 83.0


## AdaBoostClassifier

In [None]:
grid = {}
grid['n_estimators'] = [10, 50, 100]
grid['learning_rate'] = [0.0001, 0.01, 0.1]


ada = AdaBoostClassifier()

In [None]:
grid_search_ada = GridSearchCV(
    estimator=ada,
    param_grid=grid,
    scoring = 'accuracy',
    n_jobs = 5,
    cv = 5,
    verbose=10
)

In [None]:
%%time
grid_search_ada.fit(train, target)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
CPU times: user 3.14 s, sys: 186 ms, total: 3.33 s
Wall time: 21.6 s


GridSearchCV(cv=5, estimator=AdaBoostClassifier(), n_jobs=5,
             param_grid={'learning_rate': [0.0001, 0.01, 0.1],
                         'n_estimators': [10, 50, 100]},
             scoring='accuracy', verbose=10)

In [None]:
acc_ada = round(grid_search_ada.score(train, target) * 100, 2)
print('train:', acc_ada)

acc_test_ada = round(grid_search_ada.score(test, target_test) * 100, 2)
print('test', acc_test_ada)


models_results['AdaBoostClassifier_gridsearch'] = (acc_ada, acc_test_ada)

train: 85.43
test 85.24


In [None]:
df_results = pd.DataFrame().from_dict(models_results).T.reset_index()
df_results = df_results.rename(columns={'index':'Model', 0:'Train', 1:'Test'})
df_results['Round'] = 1
df_results = df_results.sort_values(by='Test', ascending=False)
df_results

Unnamed: 0,Model,Train,Test,Round
12,XGBClassifier_gridsearch,89.49,87.16,1
8,XGBClassifier,90.63,86.87,1
9,GradientBoostingClassifier,86.98,86.75,1
11,AdaBoostClassifier,86.11,86.06,1
7,RandomForestClassifier,100.0,85.67,1
14,AdaBoostClassifier_gridsearch,85.43,85.24,1
10,ExtraTreesClassifier,100.0,83.51,1
13,GradientBoostingClassifier_gridsearch,83.09,83.0,1
6,Decision Tree,100.0,81.47,1
2,Linear SVC,79.61,80.24,1
