In [1]:
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.utils import resample
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib

In [2]:
# Read in data
mod_data = pd.read_csv('appointments_clean.csv')

In [3]:
# Split into X and y
X = mod_data.drop(
    [
        'PatientId', 'AppointmentID', 'ScheduledDay', 'AppointmentDay', 
        'Neighbourhood', 'No-show', 'NoShow'
    ], 
    axis = 1
)
y = mod_data.NoShow

# Test/train split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.3, random_state = 314
)

In [4]:
# Grid search - gradient boosted
param_gb = {
    'loss': ['deviance'],
    'learning_rate': [0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2],
    'min_samples_split': np.linspace(0.1, 0.5, 9),
    'min_samples_leaf': np.linspace(0.1, 0.5, 9),
    'max_depth': [3, 5, 8],
    'max_features': ['log2', 'sqrt'],
    'criterion': ['friedman_mse'],
    'subsample': np.linspace(0.5, 1.0, 5),
    'n_estimators': [10]
} 

grid_gb01 = RandomizedSearchCV(
    GradientBoostingClassifier(),
    param_gb,
    n_iter = 50,
    refit = True,
    verbose = 3
)

grid_gb02 = RandomizedSearchCV(
    GradientBoostingClassifier(),
    param_gb,
    n_iter = 50,
    scoring = 'brier_score_loss',
    refit = True,
    verbose = 3
)

# Grid search - neural net
param_mlp = {
    'solver': ['lbfgs', 'sgd'], 
    'max_iter': np.arange(10, 21) * 100, 
    'alpha': 10.0 ** -np.arange(1, 10), 
    'hidden_layer_sizes': np.arange(10, 16)
}

grid_mlp01 = RandomizedSearchCV(
    MLPClassifier(),
    param_mlp,
    n_iter = 50,
    refit = True,
    verbose = 3
)

grid_mlp02 = RandomizedSearchCV(
    MLPClassifier(),
    param_mlp,
    n_iter = 50,
    scoring = 'brier_score_loss',
    refit = True,
    verbose = 3
)

In [5]:
gb01 = grid_gb01.fit(X_train, y_train)
gb02 = grid_gb02.fit(X_train, y_train)
mlp01 = grid_mlp01.fit(X_train, y_train)
mlp02 = grid_mlp02.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.1, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=0.7991857309034509, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.1, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.1, criterion=fried

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s


[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.45000000000000007, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.05, criterion=friedman_mse, score=0.7991857309034509, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.45000000000000007, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.05, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.45000000000000007, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.05, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.45000000000000007, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.05, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.15000000000000002, min_samples

[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.1, min_samples_leaf=0.15000000000000002, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=0.7991779440846872, total=   0.2s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.1, min_samples_leaf=0.15000000000000002, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.1, min_samples_leaf=0.15000000000000002, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=0.7991779440846872, total=   0.2s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.2, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.05, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.2, max_features=log2, max_depth=8, loss=deviance, learning_r

[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.2, min_samples_leaf=0.4, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.2, min_samples_leaf=0.4, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=0.7991857309034509, total=   0.1s
[

[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.2, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, learning_rate=0.2, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.2, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, learning_rate=0.2, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.2, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, learning_rate=0.2, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.35, min_samples_leaf=0.30000000000000004, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.15, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.35, min_samples_leaf=0.30000000000000004, max_features=log2, max_depth=3, loss=deviance, learning_r

[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.5, min_samples_leaf=0.1, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.5, min_samples_leaf=0.1, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.5, min_samples_leaf=0.1, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.625, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.4, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.01, criterion=friedman_mse 
[CV]  subsample=0.625, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.4, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.01, criterion=friedman_mse, score=0.79918573

[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.1, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=0.7991857309034509, total=   0.2s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.1, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.1, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=0.7991779440846872, total=   0.2s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.1, max_features=log2, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.1, max_features=log2, max_depth=3, loss=deviance, learnin

[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, learning_rate=0.15, criterion=friedman_mse, score=0.7991857309034509, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, learning_rate=0.15, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, learning_rate=0.15, criterion=friedman_mse, score=0.7991779440846872, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, learning_rate=0.15, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=5, loss=deviance, l

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:   20.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.15740547363199456, total=   0.1s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s


[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.15566969693837224, total=   0.1s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.30000000000000004, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.15332520213633413, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.1, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.15, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.1, max_f

[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.35, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.15395321706299242, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.35, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.45000000000000007, min_samples_leaf=0.35, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.15454461280594167, total=   0.1s
[CV] subsample=0.625, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.2, criterion=friedman_mse 
[CV]  subsample=0.625, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.2, criterio

[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=-0.1604879889994358, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=-0.16049257289408925, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.35, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=-0.16049271348063857, total=   0.1s
[CV] su

[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.5, min_samples_leaf=0.30000000000000004, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.15670225785052128, total=   0.1s
[CV] subsample=0.75, n_estimators=10, min_samples_split=0.5, min_samples_leaf=0.30000000000000004, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.75, n_estimators=10, min_samples_split=0.5, min_samples_leaf=0.30000000000000004, max_features=log2, max_depth=5, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.15533614305236235, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.30000000000000004, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.01, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.30000000000000004, max_features=log2, max_depth=8, loss=deviance, le

[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.5, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.025, criterion=friedman_mse, score=-0.1604879007765402, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.5, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.025, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.5, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.025, criterion=friedman_mse, score=-0.16049257106040288, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.5, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.025, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.15000000000000002, min_samples_leaf=0.5, max_features=log2, max_depth=8, loss=deviance, learning_

[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.45000000000000007, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.1604927363552292, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.45000000000000007, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.4, min_samples_leaf=0.45000000000000007, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse, score=-0.16049255801680298, total=   0.1s
[CV] subsample=0.875, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.5, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.075, criterion=friedman_mse 
[CV]  subsample=0.875, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.5, max_features=log2, max_depth=8, loss=deviance, learning_rate=0.075, criterion=frie

[CV]  subsample=1.0, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.1, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=-0.08125843228376052, total=   0.1s
[CV] subsample=1.0, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.1, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.1, criterion=friedman_mse 
[CV]  subsample=1.0, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.1, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=-0.08319171575406008, total=   0.1s
[CV] subsample=0.5, n_estimators=10, min_samples_split=0.35, min_samples_leaf=0.45000000000000007, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.1, criterion=friedman_mse 
[CV]  subsample=0.5, n_estimators=10, min_samples_split=0.35, min_samples_leaf=0.45000000000000007, max_features=sqrt, max_depth=3, loss=deviance, learning_rate=0.1, criterion=friedman_mse, score=-0.1604880

[CV]  subsample=0.625, n_estimators=10, min_samples_split=0.25, min_samples_leaf=0.25, max_features=sqrt, max_depth=8, loss=deviance, learning_rate=0.025, criterion=friedman_mse, score=-0.15892969856971784, total=   0.1s


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:   20.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] solver=lbfgs, max_iter=2000, hidden_layer_sizes=12, alpha=0.001 .
[CV]  solver=lbfgs, max_iter=2000, hidden_layer_sizes=12, alpha=0.001, score=0.9130670802636681, total=  21.8s
[CV] solver=lbfgs, max_iter=2000, hidden_layer_sizes=12, alpha=0.001 .


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   21.8s remaining:    0.0s


[CV]  solver=lbfgs, max_iter=2000, hidden_layer_sizes=12, alpha=0.001, score=0.8875489549808058, total=   1.0s
[CV] solver=lbfgs, max_iter=2000, hidden_layer_sizes=12, alpha=0.001 .


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   22.8s remaining:    0.0s


[CV]  solver=lbfgs, max_iter=2000, hidden_layer_sizes=12, alpha=0.001, score=0.9271394780720462, total=  19.3s
[CV] solver=lbfgs, max_iter=1100, hidden_layer_sizes=12, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1100, hidden_layer_sizes=12, alpha=1e-06, score=0.9144241954245832, total=  13.4s
[CV] solver=lbfgs, max_iter=1100, hidden_layer_sizes=12, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1100, hidden_layer_sizes=12, alpha=1e-06, score=0.9253557718407073, total=  13.3s
[CV] solver=lbfgs, max_iter=1100, hidden_layer_sizes=12, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1100, hidden_layer_sizes=12, alpha=1e-06, score=0.9273333591841483, total=   8.1s
[CV] solver=lbfgs, max_iter=1700, hidden_layer_sizes=11, alpha=0.001 .
[CV]  solver=lbfgs, max_iter=1700, hidden_layer_sizes=11, alpha=0.001, score=0.9231872818922063, total=  15.4s
[CV] solver=lbfgs, max_iter=1700, hidden_layer_sizes=11, alpha=0.001 .
[CV]  solver=lbfgs, max_iter=1700, hidden_layer_sizes=11, alpha=0.001, score=0.92543332428

[CV]  solver=lbfgs, max_iter=1600, hidden_layer_sizes=12, alpha=1e-08, score=0.9230321830166731, total=  15.6s
[CV] solver=lbfgs, max_iter=1600, hidden_layer_sizes=12, alpha=1e-08 .
[CV]  solver=lbfgs, max_iter=1600, hidden_layer_sizes=12, alpha=1e-08, score=0.9253945480631277, total=  10.7s
[CV] solver=lbfgs, max_iter=1600, hidden_layer_sizes=12, alpha=1e-08 .
[CV]  solver=lbfgs, max_iter=1600, hidden_layer_sizes=12, alpha=1e-08, score=0.9271782542944667, total=   7.9s
[CV] solver=lbfgs, max_iter=1500, hidden_layer_sizes=11, alpha=1e-08 .
[CV]  solver=lbfgs, max_iter=1500, hidden_layer_sizes=11, alpha=1e-08, score=0.923962776269872, total=  14.2s
[CV] solver=lbfgs, max_iter=1500, hidden_layer_sizes=11, alpha=1e-08 .
[CV]  solver=lbfgs, max_iter=1500, hidden_layer_sizes=11, alpha=1e-08, score=0.9253169956182868, total=   7.5s
[CV] solver=lbfgs, max_iter=1500, hidden_layer_sizes=11, alpha=1e-08 .
[CV]  solver=lbfgs, max_iter=1500, hidden_layer_sizes=11, alpha=1e-08, score=0.925859862732

[CV]  solver=sgd, max_iter=2000, hidden_layer_sizes=10, alpha=0.1, score=0.9234945131645275, total=   8.7s
[CV] solver=sgd, max_iter=2000, hidden_layer_sizes=10, alpha=0.1 .....
[CV]  solver=sgd, max_iter=2000, hidden_layer_sizes=10, alpha=0.1, score=0.9188801426964985, total=   9.7s
[CV] solver=lbfgs, max_iter=2000, hidden_layer_sizes=14, alpha=1e-05 .
[CV]  solver=lbfgs, max_iter=2000, hidden_layer_sizes=14, alpha=1e-05, score=0.9238076773943389, total=  21.4s
[CV] solver=lbfgs, max_iter=2000, hidden_layer_sizes=14, alpha=1e-05 .
[CV]  solver=lbfgs, max_iter=2000, hidden_layer_sizes=14, alpha=1e-05, score=0.9257435340649114, total=  14.7s
[CV] solver=lbfgs, max_iter=2000, hidden_layer_sizes=14, alpha=1e-05 .
[CV]  solver=lbfgs, max_iter=2000, hidden_layer_sizes=14, alpha=1e-05, score=0.9273333591841483, total=  17.6s
[CV] solver=lbfgs, max_iter=1700, hidden_layer_sizes=15, alpha=1e-05 .
[CV]  solver=lbfgs, max_iter=1700, hidden_layer_sizes=15, alpha=1e-05, score=0.923962776269872, to

[CV]  solver=lbfgs, max_iter=1500, hidden_layer_sizes=12, alpha=1e-06, score=0.9272558067393074, total=  16.0s
[CV] solver=sgd, max_iter=2000, hidden_layer_sizes=14, alpha=0.1 .....
[CV]  solver=sgd, max_iter=2000, hidden_layer_sizes=14, alpha=0.1, score=0.9227995347033734, total=  11.9s
[CV] solver=sgd, max_iter=2000, hidden_layer_sizes=14, alpha=0.1 .....
[CV]  solver=sgd, max_iter=2000, hidden_layer_sizes=14, alpha=0.1, score=0.9212454922641437, total=  11.1s
[CV] solver=sgd, max_iter=2000, hidden_layer_sizes=14, alpha=0.1 .....
[CV]  solver=sgd, max_iter=2000, hidden_layer_sizes=14, alpha=0.1, score=0.9259374151770134, total=   7.9s
[CV] solver=sgd, max_iter=1500, hidden_layer_sizes=12, alpha=0.01 ....
[CV]  solver=sgd, max_iter=1500, hidden_layer_sizes=12, alpha=0.01, score=0.9224893369523071, total=   9.4s
[CV] solver=sgd, max_iter=1500, hidden_layer_sizes=12, alpha=0.01 ....
[CV]  solver=sgd, max_iter=1500, hidden_layer_sizes=12, alpha=0.01, score=0.9229128698282213, total=   9.

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 28.7min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] solver=lbfgs, max_iter=1900, hidden_layer_sizes=13, alpha=0.0001 
[CV]  solver=lbfgs, max_iter=1900, hidden_layer_sizes=13, alpha=0.0001, score=-0.055158605634179556, total=  12.6s
[CV] solver=lbfgs, max_iter=1900, hidden_layer_sizes=13, alpha=0.0001 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.6s remaining:    0.0s


[CV]  solver=lbfgs, max_iter=1900, hidden_layer_sizes=13, alpha=0.0001, score=-0.054295373496824, total=  18.4s
[CV] solver=lbfgs, max_iter=1900, hidden_layer_sizes=13, alpha=0.0001 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   31.0s remaining:    0.0s


[CV]  solver=lbfgs, max_iter=1900, hidden_layer_sizes=13, alpha=0.0001, score=-0.05297948658753129, total=  20.2s
[CV] solver=lbfgs, max_iter=1600, hidden_layer_sizes=15, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1600, hidden_layer_sizes=15, alpha=1e-06, score=-0.05550235014380075, total=  15.4s
[CV] solver=lbfgs, max_iter=1600, hidden_layer_sizes=15, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1600, hidden_layer_sizes=15, alpha=1e-06, score=-0.05433242057282108, total=  12.1s
[CV] solver=lbfgs, max_iter=1600, hidden_layer_sizes=15, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1600, hidden_layer_sizes=15, alpha=1e-06, score=-0.05304207242204104, total=  19.6s
[CV] solver=lbfgs, max_iter=1900, hidden_layer_sizes=12, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1900, hidden_layer_sizes=12, alpha=1e-06, score=-0.055149558194005355, total=   7.6s
[CV] solver=lbfgs, max_iter=1900, hidden_layer_sizes=12, alpha=1e-06 .
[CV]  solver=lbfgs, max_iter=1900, hidden_layer_sizes=12, alpha=1e-06, score=-

[CV]  solver=lbfgs, max_iter=2000, hidden_layer_sizes=13, alpha=0.0001, score=-0.05321111453366941, total=  23.1s
[CV] solver=sgd, max_iter=1300, hidden_layer_sizes=14, alpha=1e-08 ...
[CV]  solver=sgd, max_iter=1300, hidden_layer_sizes=14, alpha=1e-08, score=-0.05603827245973521, total=  11.8s
[CV] solver=sgd, max_iter=1300, hidden_layer_sizes=14, alpha=1e-08 ...
[CV]  solver=sgd, max_iter=1300, hidden_layer_sizes=14, alpha=1e-08, score=-0.05730197893598838, total=   7.2s
[CV] solver=sgd, max_iter=1300, hidden_layer_sizes=14, alpha=1e-08 ...
[CV]  solver=sgd, max_iter=1300, hidden_layer_sizes=14, alpha=1e-08, score=-0.05716445263467539, total=  10.7s
[CV] solver=sgd, max_iter=1000, hidden_layer_sizes=10, alpha=0.001 ...
[CV]  solver=sgd, max_iter=1000, hidden_layer_sizes=10, alpha=0.001, score=-0.055875789379471005, total=   5.4s
[CV] solver=sgd, max_iter=1000, hidden_layer_sizes=10, alpha=0.001 ...
[CV]  solver=sgd, max_iter=1000, hidden_layer_sizes=10, alpha=0.001, score=-0.05502733

[CV]  solver=sgd, max_iter=1400, hidden_layer_sizes=13, alpha=0.01, score=-0.05493843084628222, total=   7.5s
[CV] solver=sgd, max_iter=1800, hidden_layer_sizes=15, alpha=1e-05 ...
[CV]  solver=sgd, max_iter=1800, hidden_layer_sizes=15, alpha=1e-05, score=-0.056210780970635026, total=   8.4s
[CV] solver=sgd, max_iter=1800, hidden_layer_sizes=15, alpha=1e-05 ...
[CV]  solver=sgd, max_iter=1800, hidden_layer_sizes=15, alpha=1e-05, score=-0.05500413469434989, total=   8.5s
[CV] solver=sgd, max_iter=1800, hidden_layer_sizes=15, alpha=1e-05 ...
[CV]  solver=sgd, max_iter=1800, hidden_layer_sizes=15, alpha=1e-05, score=-0.053704439968650625, total=  10.7s
[CV] solver=sgd, max_iter=2000, hidden_layer_sizes=12, alpha=0.1 .....
[CV]  solver=sgd, max_iter=2000, hidden_layer_sizes=12, alpha=0.1, score=-0.05597694517047195, total=  11.5s
[CV] solver=sgd, max_iter=2000, hidden_layer_sizes=12, alpha=0.1 .....
[CV]  solver=sgd, max_iter=2000, hidden_layer_sizes=12, alpha=0.1, score=-0.055545191978325

[CV]  solver=sgd, max_iter=1400, hidden_layer_sizes=14, alpha=0.001, score=-0.05408358497150171, total=  11.1s
[CV] solver=sgd, max_iter=1400, hidden_layer_sizes=14, alpha=0.0001 ..
[CV]  solver=sgd, max_iter=1400, hidden_layer_sizes=14, alpha=0.0001, score=-0.05566205552133878, total=  10.9s
[CV] solver=sgd, max_iter=1400, hidden_layer_sizes=14, alpha=0.0001 ..
[CV]  solver=sgd, max_iter=1400, hidden_layer_sizes=14, alpha=0.0001, score=-0.06032944671179578, total=   5.2s
[CV] solver=sgd, max_iter=1400, hidden_layer_sizes=14, alpha=0.0001 ..
[CV]  solver=sgd, max_iter=1400, hidden_layer_sizes=14, alpha=0.0001, score=-0.054414688059185465, total=   6.5s
[CV] solver=lbfgs, max_iter=1700, hidden_layer_sizes=15, alpha=0.001 .
[CV]  solver=lbfgs, max_iter=1700, hidden_layer_sizes=15, alpha=0.001, score=-0.055486807275129627, total=  16.8s
[CV] solver=lbfgs, max_iter=1700, hidden_layer_sizes=15, alpha=0.001 .
[CV]  solver=lbfgs, max_iter=1700, hidden_layer_sizes=15, alpha=0.001, score=-0.054

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 27.7min finished


In [6]:
def prob_summary(clf, data, actual):
    
    # Generate predicted probabilities
    probs = clf.predict_proba(data)
    probs = pd.Series([i[1] for i in probs])
    
    # Add probabilities to actuals in dataframe
    probs_df = pd.concat([probs, actual.reset_index()], axis = 1).reset_index()
    
    # Clean dataframe
    probs_df.columns = ['level_0', 'Prob', 'index', 'Actual']
    probs_df         = probs_df.drop(['level_0', 'index'], axis = 1)
    
    # Round probabilities to nearest tenth
    probs_df['RoundedProb'] = (probs_df['Prob']).round(1)
    
    # Summarize
    summary = probs_df.groupby(
        'RoundedProb'
    ).agg(
        {
            'Actual':['count', 'sum']
        }
    )
    
    # Fix column names
    summary.columns = ["_".join(i) for i in summary.columns.ravel()]
    
    # Calculate predicted percentages
    summary['Actual_percentage'] = (summary['Actual_sum'] / summary['Actual_count']).round(2)
    return(summary)

In [7]:
print('Gradient Boosting')
print(prob_summary(gb01, X_test, y_test), '\n')

print('Gradient Boosting - Brier Score Loss')
print(prob_summary(gb01, X_test, y_test), '\n')

print('Neural Network')
print(prob_summary(mlp01, X_test, y_test), '\n')

print('Neural Network - Brier Score Loss')
print(prob_summary(mlp01, X_test, y_test), '\n')

Gradient Boosting
             Actual_count  Actual_sum  Actual_percentage
RoundedProb                                             
0.2                 33159        6782                0.2 

Gradient Boosting - Brier Score Loss
             Actual_count  Actual_sum  Actual_percentage
RoundedProb                                             
0.2                 33159        6782                0.2 

Neural Network
             Actual_count  Actual_sum  Actual_percentage
RoundedProb                                             
0.0                 23091           0               0.00
0.1                    41          13               0.32
0.2                   791         159               0.20
0.3                   610         172               0.28
0.4                   266         106               0.40
0.5                    87          44               0.51
0.6                   946         621               0.66
0.7                  2873        1992               0.69
0.8           

In [8]:
with open("Schedule_gb01.pkl", 'wb') as file:  
    pickle.dump(gb01, file)
    
with open("Schedule_gb02.pkl", 'wb') as file:  
    pickle.dump(gb02, file)
    
with open("Schedule_mlp01.pkl", 'wb') as file:  
    pickle.dump(mlp01, file)
    
with open("Schedule_mlp02.pkl", 'wb') as file:  
    pickle.dump(mlp02, file)