# Credit default predictions

This is a classical exercise in machine learning, a bit less in financial engineering. Using individual customer information, we will try to predict whether they will default on their credit card payment next month or not...

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

tf.logging.set_verbosity(tf.logging.ERROR)

# we will enable the eager mode to show what the one-hote encoding does
tf.enable_eager_execution()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Download the data

In [2]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 10363327985301670209, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 3835846686228008400
 physical_device_desc: "device: XLA_CPU device"]

In [2]:
## Load dataset from UCI machine learning repository
## https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls'
df = pd.read_excel(url, skiprows=1)
df.rename(columns={'default payment next month' : 'Y'}, inplace=True)
df = df.drop('ID', axis=1)

## Quickly check the data
df.head()
#df.columns.values
#df.SEX.value_counts().plot(kind='barh')
#df.Y.value_counts().plot(kind='barh')

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Y
0,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
1,120000,2,2,2,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,2,2,2,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,2,2,1,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


We split the data.

In [3]:
## Make train / test data
X = df.drop('Y', axis=1)
y = df['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.33, 
                                                    random_state=123)

## Prepare the data

Here, we have to be more careful and distinguish between numerical and categorical data. 

In [4]:
## Prepare features
fc = tf.feature_column
CATEGORICAL_COLUMNS = ['SEX', 'EDUCATION', 'MARRIAGE', 
                       'PAY_0', 'PAY_2', 'PAY_3', 
                       'PAY_4', 'PAY_5', 'PAY_6']
NUMERIC_COLUMNS = ['LIMIT_BAL', 'AGE', 
                   'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 
                   'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 
                   'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 
                   'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']

def one_hot_cat_column(feature_name, vocab):
    return fc.indicator_column(
        fc.categorical_column_with_vocabulary_list(feature_name, vocab))

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = df[feature_name].unique()
    feature_columns.append(one_hot_cat_column(feature_name, vocabulary))
  
for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(fc.numeric_column(feature_name, dtype=tf.float32))

Just an example of how one-hot encoding works.

In [5]:
## Example of one-hot encoding
example = df.head(1)
class_fc = one_hot_cat_column('EDUCATION', (0,1,2,3,4,5,6))
print('Feature value: "{}"'.format(example['EDUCATION'].iloc[0]))
print('One-hot encoded: ', fc.input_layer(dict(example), [class_fc]).numpy())   

Feature value: "2"
One-hot encoded:  [[0. 0. 1. 0. 0. 0. 0.]]


## input_fn()

In [6]:
def make_input_fn(X, y, n_epochs=None, shuffle=True, batch_size=len(y)):
    def input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
        if shuffle:
            dataset = dataset.shuffle(batch_size)
        dataset = dataset.repeat(n_epochs)  
        dataset = dataset.batch(batch_size)
        return dataset
    return input_fn

# Training and evaluation input functions.
train_input_fn = make_input_fn(X_train, y_train)
test_input_fn = make_input_fn(X_test, y_test, shuffle=False, n_epochs=1)

## Create and train model

In [7]:
## Linear classifier
est = tf.estimator.LinearClassifier(feature_columns)

## Boosted trees
## https://www.tensorflow.org/api_docs/python/tf/estimator/BoostedTreesClassifier
#nbpl = int(np.ceil(0.5 * len(y_train) / 128))
#est = tf.estimator.BoostedTreesClassifier(feature_columns,
#                                          n_batches_per_layer=nbpl)

# Train model
est.train(train_input_fn, max_steps=1000)

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifier at 0x7f271d986978>

Evaluate the train/test fits, the baseline model is not using the feature (constant model).

In [8]:
# Train data, if do not make a new input it will run all the batches and epoch
train_input_fn = make_input_fn(X_train, y_train, shuffle=False, n_epochs=1)
results_train = est.evaluate(train_input_fn)
print('Accuracy (train data): ', results_train['accuracy'])
print('Dummy model (train data): ', results_train['accuracy_baseline'])

# Test data
results_test = est.evaluate(test_input_fn)
print('Accuracy (test data): ', results_test['accuracy'])
print('Dummy model (test data): ', results_test['accuracy_baseline'])

Accuracy (train data):  0.3233831
Dummy model (train data):  0.7771144
Accuracy (test data):  0.31919193
Dummy model (test data):  0.7822222


## Exercises

Compute/plot (using some external library if your like)
* the confusion matrix with some its metrics (F1 score, J score, etc)
* the features importance (use the function ```est.experimental_feature_importances```)
* the AUC-ROC
* the partial dependency plot (PDP)
* the (centered) ICE
* the directional feature contributions (use the function ```est.experimental_predict_with_explanations```)