In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
pd.set_option('display.max_columns', None)

In [2]:
tr_features = pd.read_csv('data/train_features.csv')
tr_labels = pd.read_csv('data/train_labels.csv')

te_features = pd.read_csv('data/test_features.csv')
te_labels = pd.read_csv('data/test_labels.csv')

In [3]:
def print_results(results):
    print(f'BEST PARAMS: {results.best_params_}\n')

    means = sorted(results.cv_results_['mean_test_score'], reverse=True)
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print(f'{round(mean,3)} (+/- {round(std * 2, 3)}) for {params}')

In [7]:
gb = GradientBoostingClassifier()
gbscores = cross_val_score(gb, tr_features, tr_labels.values.ravel(), cv=5, n_jobs=16)

print(gbscores)
gb.fit(tr_features, tr_labels.values.ravel())

[0.38593725 0.40422513 0.38546429 0.39681539 0.39098219]


In [5]:
gbparams = {
    'loss': ['log_loss', 'deviance', 'exponential'],
    'n_estimators': [10, 50],
    'criterion': ['friedman_mse', 'squared_error'],
    'max_features': ['sqrt', 'log2', None],
    'warm_start': [True, False]
}
cv = GridSearchCV(gb, gbparams, cv=5, n_jobs=16)
cv.fit(tr_features, tr_labels.values.ravel())

print_results(cv)

KeyboardInterrupt: 

In [None]:
for mdl in [gb]:
    y_pred = mdl.predict(te_features)
    accuracy = round(accuracy_score(te_labels, y_pred), 8)
    precision = round(precision_score(te_labels, y_pred, average='weighted'), 8)
    recall = round(recall_score(te_labels, y_pred, average='weighted'), 8)
    f1 = round(f1_score(te_labels, y_pred, average='weighted'), 8)
    print(f'MAX DEPTH: {mdl.max_depth} / MAX LEAF NODES: {mdl.max_leaf_nodes} / A: {accuracy} / P: {precision} / R: {recall} / F1: {f1}')

MAX DEPTH: 3 / MAX LEAF NODES: None / A: 0.39765418 / P: 0.38738848 / R: 0.39765418 / F1: 0.38454585
