## NN code snippets

In [None]:
# if packages not installed yet:

# import sys
# !conda install --yes --prefix {sys.prefix} numpy
# !conda install --yes --prefix {sys.prefix} pandas
# !conda install --yes --prefix {sys.prefix} tensorflow
# !conda install --yes --prefix {sys.prefix} scikit-learn
# !conda install --yes --prefix {sys.prefix} keras

### Hyperparameter tuning using random search

In [None]:
# for model timing
time.ctime()

In [None]:
# create model and wrap into sklearn compatible classifier
model = KerasClassifier(build_fn=create_model, verbose=0)

# # define hyperparameter search space
# hiddenLayerOne = [0, 5, 18]
# learnRate = [1e-2, 1e-3, 1e-4]
# batchSize = [5, 10, 20]
# epochs = [10, 30, 80]

model_params = {
    # randomly sample numbers from 4 to 204 estimators
    'n_estimators': randint(4,200),
    # normally distributed max_features, with mean .25 stddev 0.1, bounded between 0 and 1
    'max_features': truncnorm(a=0, b=1, loc=0.25, scale=0.1),
    # uniform distribution from 0.01 to 0.2 (0.01 + 0.199)
    'min_samples_split': uniform(0.01, 0.199)
}

# create dictionary from search space
grid = dict(
    hiddenLayerOne=hiddenLayerOne,
    learnRate=learnRate,
    batch_size=batchSize,
    epochs=epochs
)

# create 10-fold cross validation generator
cv = KFold(n_splits=10)

# create random searcher with 10-fold cv and start tuning process
searcher = RandomizedSearchCV(
    estimator=model, 
    n_jobs=1, 
    cv=cv,
    param_distributions=model_params,
    scoring='accuracy') # n-jobs=-1 ensures multiple cores are used
searchResults = searcher.fit(train_features, train_targets)

# summarise random search info
bestScore = searchResults.best_score_
bestParams = searchResults.best_params_
print("[INFO] best score is {:.2f} using {}".format(bestScore,bestParams))

# for model timing
time.ctime()

### Baseline model:

In [None]:
# create model
model = create_model()

# fit model on the dataset
model.fit(train_features, train_targets, epochs=100, batch_size=10)

# evaluate model, print AUC
_, accuracy = model.evaluate(X, y, verbose=0)
print('Accuracy: %.2f' %(accuracy*100))

## Trying tensorboard

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
%reload_ext tensorboard

In [None]:
# run tensorboard
%tensorboard --logdir logs/fit

In [None]:
# create model
model = create_model()

# create 10-fold cross validation generator
cv = KFold(n_splits=10)

# fit model on the dataset
model.fit(train_features, train_targets, epochs=300, batch_size=128)

# evaluate model, print AUC
_, accuracy = model.evaluate(X, y, verbose=0)
print('Accuracy: %.2f' %(accuracy*100))

In [None]:
# create model
fin_model = create_model(hiddenLayerOne=10, learnRate=0.1)

# fit model on the dataset
model.fit(train_features, train_targets, epochs=800, batch_size=128)


# create timestamped log directory and ensre logs are cerated and stored
log_dir = 'logs_run1'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


# # evaluate model, print AUC
# _, accuracy = model.evaluate(X, y, verbose=0)
# print('Accuracy: %.2f' %(accuracy*100))

## Notes

In [None]:
# CODE FROM BOOSTED TREES
predictions = model.predict(test_features)


# Use score method to get accuracy of model
accuracy = metrics.accuracy_score(test_targets, predictions)
print("Accuracy: ", + np.round(accuracy , 3))

print("Confusion Matrix:")
print(confusion_matrix(test_targets, predictions))

print("Classification Report")
print(classification_report(test_targets, predictions))
    
#Beginning the plotting of ROC-curve
pred_prob = classifier.predict_proba(test_features)
fpr, tpr, thresh = roc_curve(test_targets, pred_prob[:,1], pos_label=1)
    
#Plot roc curves
plt.plot(fpr, tpr, linestyle='--',color='orange', label='SVM')

# title
plt.title('ROC curve')
# x label
plt.xlabel('False Positive Rate')
# y label
plt.ylabel('True Positive rate')

plt.legend(loc='best')
plt.savefig('ROC',dpi=300)
plt.show();
    
#AUC Score
auc_score = roc_auc_score(test_targets, pred_prob[:,1])
print("AUC Score: " + str(np.round(auc_score , 3)))
    
#Log-loss function
print("Log-Loss: " + str(np.round(log_loss(test_targets, predictions),3)))