Credit Data Default Preditions - NN Edition
Sean Vucinich

This notebook tackles the problem of predicting whether an individual will default on their credit line using a neural net as opposed to other forms of machine learning.

Imports and Function definitions:

In [164]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf

Defining mini-batch function:

In [165]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [166]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

Data Preprocessing

Importing the dataset

In [167]:
input_file = "https://raw.githubusercontent.com/akatzuka/CST-463-Project-1/master/default_cc_train.csv"
df = pd.read_csv(input_file)

Setting the data and labels into variables

In [168]:
df.rename(columns={'PAY_0':'PAY_1'}, inplace=True)
PAY_columns = ['PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6']
df[PAY_columns] += 1

X = df.iloc[:,0:24]
y = np.asarray(df["default.payment.next.month"])

Scaling the data

In [169]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Splitting the data into training and test sets

In [170]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled,
y, test_size=0.25)

Setting up the nerual net framework

In [171]:
n_inputs = 24  # Credit
n_hidden1 = 300
n_hidden2 = 200
n_hidden3 = 100
n_hidden4 = 50
n_outputs = 10

reset_graph()

In [172]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [173]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.elu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.elu)
    hidden3 = tf.layers.dense(hidden2, n_hidden3, name="hidden3", activation=tf.nn.elu)
    hidden4 = tf.layers.dense(hidden3, n_hidden4, name="hidden4", activation=tf.nn.elu)
    logits = tf.layers.dense(hidden4, n_outputs, name="outputs")
    y_proba = tf.nn.softmax(logits)

In [174]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [175]:
learning_rate = 0.01

In [176]:
with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9, use_nesterov=True)
    training_op = optimizer.minimize(loss)

In [177]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [178]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

Model - Execution Phase

In [179]:
n_epochs = 20
n_batches = 50
batch_size = n_batches

In [180]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.8 Validation accuracy: 0.78752
1 Batch accuracy: 0.64 Validation accuracy: 0.81568
2 Batch accuracy: 0.82 Validation accuracy: 0.8168
3 Batch accuracy: 0.82 Validation accuracy: 0.81664
4 Batch accuracy: 0.76 Validation accuracy: 0.81616
5 Batch accuracy: 0.84 Validation accuracy: 0.8168
6 Batch accuracy: 0.84 Validation accuracy: 0.81328
7 Batch accuracy: 0.7 Validation accuracy: 0.81344
8 Batch accuracy: 0.88 Validation accuracy: 0.81104
9 Batch accuracy: 0.72 Validation accuracy: 0.81488
10 Batch accuracy: 0.82 Validation accuracy: 0.81536
11 Batch accuracy: 0.86 Validation accuracy: 0.8176
12 Batch accuracy: 0.8 Validation accuracy: 0.81168
13 Batch accuracy: 0.76 Validation accuracy: 0.81712
14 Batch accuracy: 0.88 Validation accuracy: 0.81344
15 Batch accuracy: 0.8 Validation accuracy: 0.81792
16 Batch accuracy: 0.86 Validation accuracy: 0.81648
17 Batch accuracy: 0.88 Validation accuracy: 0.816
18 Batch accuracy: 0.82 Validation accuracy: 0.81568
19 Batch acc