# Setup

In [7]:
import numpy as np
import os
import sklearn.metrics

import models
import util

SEED = 2021
np.random.seed(SEED)

# Stay in top-level directory for consistency
if '/src' in os.getcwd():
    os.chdir('..')

In [8]:
# Load data
xtrain, ytrain, xtest, ytest, xval, yval = util.load_preg_data(sim=True, onehots=True)
# Create binary labels
ytrain_early, ytrain_late, ytrain_preterm = util.preg_outcome_to_binaries(ytrain)
ytest_early, ytest_late, ytest_preterm = util.preg_outcome_to_binaries(ytest)
yval_early, yval_late, yval_preterm = util.preg_outcome_to_binaries(yval)

# Logistic Regression

## Train

In [9]:
lr_preterm = models.build_logreg()
models.fit_logreg(lr_preterm, xtrain, ytrain_preterm)
models.save_pickle(lr_preterm, 'models/lr_preterm')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [10]:
# Example load and evaluate
del lr_preterm
lr_preterm = models.load_pickle('models/lr_preterm')
print(lr_preterm.score(xtest, ytest_preterm))
print(sklearn.metrics.classification_report(ytest_preterm, lr_preterm.predict(xtest), digits=4))

0.9695571955719557
              precision    recall  f1-score   support

           0     0.9646    0.9737    0.9691       532
           1     0.9744    0.9656    0.9700       552

    accuracy                         0.9696      1084
   macro avg     0.9695    0.9696    0.9696      1084
weighted avg     0.9696    0.9696    0.9696      1084



# Gradient Boosting

In [11]:
gb_preterm = models.build_gbdt()
models.fit_gbdt(gb_preterm, xtrain, ytrain_preterm, xval, yval_preterm)
models.save_pickle(gb_preterm, 'models/gb_preterm')

[100]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.610603
[200]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.542087
[300]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.484359
[400]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.435218
[500]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.39304
[600]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.356594
[700]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.324925
[800]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.297276
[900]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.273042
[1000]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.251728
[1100]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.232927
[1200]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.216301
[1300]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.201565
[1400]	valid_0's auc: 0.995474	valid_0's binary_logloss: 0.188479
[1500]	valid_0's auc: 0.998312	valid_0's binary_logloss: 0.175566
[1600]	valid_0's auc

# Neural nets

In [12]:
selu_preterm = models.build_NN_selu(input_len=xtrain.shape[1])  # Assuming xtrain is (batch_size, n_features)
models.fit_NN_selu(selu_preterm, xtrain, ytrain_preterm, xval, yval_preterm)
models.save_NN(selu_preterm, 'models/selu_preterm')

Training model with class weights, {0: 1.0176706827309236, 1: 0.9829325058184639}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: models/selu_preterm/assets


In [13]:
del selu_preterm
selu_preterm = models.load_NN('models/selu_preterm')
selu_preterm.evaluate(xtest, ytest_preterm)  # Output is [loss, accuracy, auc]



[0.10901693999767303, 0.0, 0.9936066269874573]