# Setup

In [1]:
import numpy as np
import os
import sklearn.metrics

import models
import util

SEED = 2021
np.random.seed(SEED)
# TODO might need to set more seeds (tensorflow, ...)

# Stay in top-level directory for consistency
if '/src' in os.getcwd():
    os.chdir('..')

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [2]:
# Load data
xtrain, ytrain, xtest, ytest, xval, yval = util.load_preg_data(sim=True, onehots=True)
# Create binary labels
ytrain_early, ytrain_late, ytrain_preterm = util.preg_outcome_to_binaries(ytrain)
ytest_early, ytest_late, ytest_preterm = util.preg_outcome_to_binaries(ytest)
yval_early, yval_late, yval_preterm = util.preg_outcome_to_binaries(yval)

# Logistic Regression

## Train

In [3]:
# Example with the preterm outcome
lr_preterm = models.build_logreg()
models.fit_logreg(lr_preterm, xtrain, ytrain_preterm)
models.save_pickle(lr_preterm, 'models/lr_preterm')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [4]:
# Example loading model to evaluate
del lr_preterm
lr_preterm = models.load_pickle('models/lr_preterm')
print(lr_preterm.score(xtest, ytest_preterm))
print(sklearn.metrics.classification_report(ytest_preterm, lr_preterm.predict(xtest), digits=4))

0.9695571955719557
              precision    recall  f1-score   support

       False     0.9646    0.9737    0.9691       532
        True     0.9744    0.9656    0.9700       552

    accuracy                         0.9696      1084
   macro avg     0.9695    0.9696    0.9696      1084
weighted avg     0.9696    0.9696    0.9696      1084



# Gradient Boosting

In [5]:
# Example with the preterm outcome
gb_preterm = models.build_gbdt()
models.fit_gbdt(gb_preterm, xtrain, ytrain_preterm, xval, yval_preterm)
models.save_pickle(gb_preterm, 'models/gb_preterm')

[100]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.610603
[200]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.542087
[300]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.484359
[400]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.435218
[500]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.39304
[600]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.356594
[700]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.324925
[800]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.297276
[900]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.273042
[1000]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.251728
[1100]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.232927
[1200]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.216301
[1300]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.201565
[1400]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.188479
[1500]	valid_0's auc: 0.998312	valid_0's binary_logloss: 0.175566
[1600]	valid_0's auc

In [6]:
# Example loading model to evaluate
del gb_preterm
gb_preterm = models.load_pickle('models/gb_preterm')
print(gb_preterm.score(xtest, ytest_preterm))
print(sklearn.metrics.classification_report(ytest_preterm, gb_preterm.predict(xtest), digits=4))

0.9704797047970479
              precision    recall  f1-score   support

       False     0.9480    0.9944    0.9706       532
        True     0.9943    0.9475    0.9703       552

    accuracy                         0.9705      1084
   macro avg     0.9712    0.9709    0.9705      1084
weighted avg     0.9716    0.9705    0.9705      1084



# Neural net experimenting

In [3]:
# Let's work on NN2 first. Once we have that, it will be easy to simplify it back to NN1.

# Example with the preterm outcome
selu_preterm = models.build_NN_selu(input_len=xtest.shape[1])  # Assuming xtest is (batch, cols)
history = models.fit_NN_selu(selu_preterm, xtrain, ytrain_preterm, xval, yval_preterm)
models.save_NN(selu_preterm, 'models/selu_preterm')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: models/selu_preterm/assets


In [4]:
# Example loading model to evaluate
del selu_preterm
selu_preterm = models.load_NN('models/selu_preterm')
selu_preterm.evaluate(xtest, ytest_preterm)  # Output is [loss, accuracy, auc]



[0.12338992953300476, 0.0, 0.9925220608711243]