In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.utils import resample
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
# Read in data
mod_data = pd.read_csv('appointments_clean.csv')

In [None]:
# Split into X and y
X = mod_data.drop(
    ['PatientId', 'AppointmentID', 'ScheduledDay', 'AppointmentDay', 
     'Neighbourhood', 'No-show', 'NoShow'], 
    axis = 1
)
y = mod_data.NoShow

# Test/train split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.3, random_state = 314
)

In [None]:
# Scale data
scaler = MinMaxScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

In [None]:
# Grid search - gradient boosted
param_grid = {
    'loss': ['deviance'],
    'learning_rate': [0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2],
    'min_samples_split': np.linspace(0.1, 0.5, 9),
    'min_samples_leaf': np.linspace(0.1, 0.5, 9),
    'max_depth': [3, 5, 8],
    'max_features': ['log2', 'sqrt'],
    'criterion': ['friedman_mse',  'mae'],
    'subsample': np.linspace(0.5, 1.0, 5),
    'n_estimators': [10]
} 

grid = GridSearchCV(
    GradientBoostingClassifier(),
    param_grid,
    refit = True,
    verbose = 3
)

grid.fit(X_train_scaled, y_train)

grid.best_params_

grid.best_estimator_

grid_predictions = grid.predict(X_test_scaled)

In [None]:
# Grid search - neural net
parameters = {
    'solver': ['lbfgs', 'sgd'], 
    'max_iter': np.arange(10, 21) * 100, 
    'alpha': 10.0 ** -np.arange(1, 10), 
    'hidden_layer_sizes': np.arange(10, 16)
}


In [None]:
# Model assessment

In [None]:
# Save final model

In [4]:
np.arange(10, 15)

array([10, 11, 12, 13, 14])

In [15]:
np.linspace(0.5, 1.0, 5)

array([0.5  , 0.625, 0.75 , 0.875, 1.   ])