# Setup

In [None]:
import gc
import numpy as np
import os
import sklearn.metrics
import tensorflow as tf

import models
import util

SEED = 2021
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Stay in top-level directory for consistency
if '/src' in os.getcwd():
    os.chdir('..')

In [None]:
# Load data
xtrain, ytrain, xtest, ytest, xval, yval = util.load_preg_data_final(datafile='stillbirth')
# Convert class labels to binary labels
ytrain = util.outcome_to_binary(ytrain, outcome='early stillbirth')  # Choose between early stillbirth, late stillbirth, and preterm
ytest = util.outcome_to_binary(ytest, outcome='early stillbirth')
yval = util.outcome_to_binary(yval, outcome='early stillbirth')

# Don't keep the vars we won't use in this notebook, for memory reasons
del xtest
del ytest
gc.collect()

In [None]:
# Curious about our columns --- check for normalization, mins and maxes
for c in xtrain.columns:
    display(xtrain[c].describe())


# Logistic Regression

In [None]:
lr_early = models.build_logreg()
models.fit_logreg(lr_early, xtrain, ytrain)
models.save_pickle(lr_early, 'models/lr_early')

In [None]:
print(lr_early.score(xtest, ytest))
print(sklearn.metrics.classification_report(ytest, lr_early.predict(xtest), digits=4))
del lr_early
gc.collect()

# Gradient Boosting

In [None]:
from importlib import reload
reload(models)
gb_early = models.build_gbdt()
models.fit_gbdt(gb_early, xtrain, ytrain, xval, yval)
models.save_pickle(gb_early, 'models/gb_early')

In [None]:
print(gb_early.score(xtest, ytest))
print(sklearn.metrics.classification_report(ytest, gb_early.predict(xtest), digits=4))
del gb_early
gc.collect()

# Neural nets

In [None]:
selu_early = models.build_NN_selu(input_len=xtrain.shape[1])  # Assuming xtrain is (batch_size, n_features)
selu_early.summary()

models.fit_NN_selu(selu_early, xtrain, ytrain, xval, yval)
models.save_NN(selu_early, 'models/selu_early')

In [None]:
selu_early.evaluate(xtest, ytest)  # Output is [loss, accuracy, auc]
del selu_early
gc.collect()

In [None]:
lrelu_early = models.build_NN_lrelu(input_len=xtrain.shape[1])  # Assuming xtrain is (batch_size, n_features)
lrelu_early.summary()

models.fit_NN_lrelu(lrelu_early, xtrain, ytrain, xval, yval)
models.save_NN(lrelu_early, 'models/lrelu_early')

In [None]:
lrelu_early.evaluate(xtest, ytest)  # Output is [loss, accuracy, auc]
del lrelu_early
gc.collect()