In [125]:
import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [128]:
test_data = pd.read_csv('../DATA/test_normalized_4labels.csv', index_col=0)
training_data = pd.read_csv(
    '../DATA/train_normalized_4labels.csv', index_col=0)


  mask |= (ar1 == a)


In [129]:
labels = {'CERTIFIED': 0, 'CERTIFIED-WITHDRAWN': 1,
          'DENIED': 1, 'WITHDRAWN': 1}
test_data['CASE_STATUS'] = test_data['CASE_STATUS'].apply(lambda x: labels[x])
training_data['CASE_STATUS'] = training_data['CASE_STATUS'].apply(
    lambda x: labels[x])


In [130]:
train_data, val_data = train_test_split(training_data, test_size=0.2)


In [131]:
train_labels = np.array(train_data.pop('CASE_STATUS'))
val_labels = np.array(val_data.pop('CASE_STATUS'))
test_labels = np.array(test_data.pop('CASE_STATUS'))

train_data.pop('EMPLOYER_NAME')
test_data.pop('EMPLOYER_NAME')
val_data.pop('EMPLOYER_NAME')

train_features = np.array(train_data)
val_features = np.array(val_data)
test_features = np.array(test_data)


In [132]:
print(train_features.shape)
print(test_features.shape)
print(val_features.shape)
print(train_labels.shape)
print(test_labels.shape)
print(val_labels.shape)

(1363656, 420)
(730531, 420)
(340915, 420)
(1363656,)
(730531,)
(340915,)


In [133]:
METRICS = [
    keras.metrics.TruePositives(name='tp'),
    keras.metrics.FalsePositives(name='fp'),
    keras.metrics.TrueNegatives(name='tn'),
    keras.metrics.FalseNegatives(name='fn'),
    keras.metrics.BinaryAccuracy(name='accuracy'),
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
    keras.metrics.AUC(name='auc'),
    keras.metrics.AUC(name='prc', curve='PR'),  # precision-recall curve
]

def make_model(metrics=METRICS, output_bias=None):
  if output_bias is not None:
    output_bias = tf.keras.initializers.Constant(output_bias)
  model = keras.Sequential([
      keras.layers.Dense(
          256, activation='relu',
          input_shape=(train_features.shape[-1],)),
      keras.layers.Dense(
          64, activation='relu'),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(1, activation='sigmoid',
                         bias_initializer=output_bias),
  ])

  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=1e-3),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=metrics)

  return model


In [140]:
EPOCHS = 100
BATCH_SIZE = 2048
filename = '../saved_models/nn_2labels_model.sav'
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=filename,
                                                 save_weights_only=True,
                                                 verbose=1)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_auc',
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)
model = make_model()
model.summary()


Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_51 (Dense)             (None, 256)               107776    
_________________________________________________________________
dense_52 (Dense)             (None, 64)                16448     
_________________________________________________________________
dropout_17 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_53 (Dense)             (None, 1)                 65        
Total params: 124,289
Trainable params: 124,289
Non-trainable params: 0
_________________________________________________________________


In [141]:
model.fit(
    train_features,
    train_labels,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(val_features, val_labels),
    callbacks=[early_stopping, cp_callback],
    verbose=1)


Epoch 1/100

Epoch 00001: saving model to ../saved_models/nn_2labels_model.sav
Epoch 2/100

Epoch 00002: saving model to ../saved_models/nn_2labels_model.sav
Epoch 3/100

Epoch 00003: saving model to ../saved_models/nn_2labels_model.sav
Epoch 4/100

Epoch 00004: saving model to ../saved_models/nn_2labels_model.sav
Epoch 5/100

Epoch 00005: saving model to ../saved_models/nn_2labels_model.sav
Epoch 6/100

Epoch 00006: saving model to ../saved_models/nn_2labels_model.sav
Epoch 7/100

Epoch 00007: saving model to ../saved_models/nn_2labels_model.sav
Epoch 8/100

Epoch 00008: saving model to ../saved_models/nn_2labels_model.sav
Epoch 9/100

Epoch 00009: saving model to ../saved_models/nn_2labels_model.sav
Epoch 10/100

Epoch 00010: saving model to ../saved_models/nn_2labels_model.sav
Epoch 11/100

Epoch 00011: saving model to ../saved_models/nn_2labels_model.sav
Epoch 12/100

Epoch 00012: saving model to ../saved_models/nn_2labels_model.sav
Epoch 13/100

Epoch 00013: saving model to ../sav

<keras.callbacks.History at 0x7fa474f899a0>

In [138]:
model.evaluate(test_features, test_labels)



[0.06662300229072571,
 74403.0,
 1199.0,
 642816.0,
 12113.0,
 0.9817776679992676,
 0.9841406345367432,
 0.859991192817688,
 0.9812482595443726,
 0.9512678384780884]