## Import statements

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import pandas as pd
import numpy as np
import keras

from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

## Function definitions

In [0]:
def data_processing(datafile):
    data = pd.read_csv(datafile)

    target = data['action_taken_name']                                          # 'action_take_name': loan approval (1), loan denial (0)
    target = to_categorical(target) 
    
    predictors = data.drop(['action_taken_name'], axis=1)
    predictors = predictors.drop(predictors.columns[0], axis=1)
    
    n_cols = predictors.shape[1]

    return n_cols, predictors, target

In [0]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]                                     # tf.metrics.auc may produce a slightly different value compared to sklearn.metrics.roc_auc_score
    keras.backend.get_session().run(tf.local_variables_initializer())
    return auc

In [0]:
def train_model(datafile='nc_sc_ga_va_clean_v0.csv', epochs=10, batch_size=512, optimizer='adam', loss='categorical_crossentropy', test_size=0.2):
    # Process input datafile
    n_cols, predictors, target = data_processing(datafile)
    x_train, x_test, y_train, y_test = train_test_split(predictors, target, test_size=test_size, random_state=0)

    # Define layers
    model = keras.models.Sequential([
            keras.layers.Dense(32, activation=tf.nn.relu, input_shape = (n_cols,)),
            keras.layers.Dense(2, activation=tf.nn.relu),
            keras.layers.Dense(20, activation=tf.nn.relu),
            keras.layers.Dense(2, activation=tf.nn.softmax)
        ])
    
    # Train model
    print('\n# Begin training')
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy', auc])
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose = 1)

    # Evaluate model
    print('\n# Evaluate on test data')
    results = model.evaluate(x_test, y_test, batch_size=batch_size)
    print('\n# Test results')
    print('Test loss: {:.4f} \nTest AUC: {:.4f} \nTest accuracy: {:.2f}%'.format(results[0], results[2], results[1]*100))  
    
    return model

## Train model

In [30]:
model = train_model()


# Begin training
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

# Evaluate on test data

# Test results
Test loss: 0.3926 
Test AUC: 0.9009 
Test accuracy: 83.09%
