# Predict credit default

- Dataset: https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients
- Inspiration for model: https://www.kaggle.com/code/mahyar511/payment-default-prediction-neural-network

# 1. Setup

In [2]:
# ! conda install keras -y
# ! conda install tensorflow -y

In [3]:
# ! conda install xlrd -y

In [4]:
# imports
import pandas as pd
import numpy as np

from sklearn.utils import class_weight
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical


In [5]:
# get the dataset from UCI ML Repository
# ! curl -o default.xls https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls

In [6]:
# load the dataset
df = pd.read_excel('data/default.xls', header=1)
df.shape

(30000, 25)

In [7]:
# examine
df.head(2)

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1


In [10]:
# distribution of the target (credit default)
print(df['default payment next month'].dtypes)
df['default payment next month'].value_counts()

int64


0    23364
1     6636
Name: default payment next month, dtype: int64

In [13]:
# Calculating default Ratio
non_default = len(df[df['default payment next month']==0])
default = len(df[df['default payment next month']==1])
ratio = float(default/(non_default+default))
print('Default Ratio: ', ratio)

Default Ratio:  0.2212


## Feature engineering

In [14]:
# split into input (X) and output (y) variables
X = df.drop(['ID', 'default payment next month'], axis=1)
print(X.shape[1])

23


In [16]:
# y = to_categorical(df['default payment next month'])
y = np.array(df['default payment next month'])
y[:5]

array([1, 1, 0, 0, 0])

In [17]:
print(X.shape, y.shape)

(30000, 23) (30000,)


## Train test split

In [19]:
# split into 67% for train and 33% for test

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
# Standardize the predictors
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [20]:
# how many features are there? (this will be the input dimensions)
print(X_train.shape)
print(y_train.shape)

(20100, 23)
(20100,)


In [21]:
# how many in each category now?
print('y_train')
unique, counts = np.unique(y_train, return_counts=True)
for x in [0,1]:
    print(unique[x],counts[x])
print('y_test')
unique, counts = np.unique(y_test, return_counts=True)
for x in [0,1]:
    print(unique[x],counts[x])


y_train
0 15622
1 4478
y_test
0 7742
1 2158


# 2. Define Keras Model

In [36]:
# parameters
n_cols = X_train.shape[1]
early_stopping_monitor = EarlyStopping(patience=2)
class_weight = {0:ratio, 1:1-ratio}
print(n_cols)

23


In [37]:
# keras has two APIs: Sequential and Functional.
model = Sequential()

In [38]:
# input layer and first hidden layer
# relu = rectified linear unit
model.add(Dense(12, activation='relu', 
                input_shape = (n_cols,))
         )

In [39]:
# second hidden layer
model.add(Dense(12, activation='relu'))

In [40]:
# output layer
model.add(Dense(2, activation='softmax'))

# 3. Compile Keras Model

In [41]:
# compile the keras model
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy']
             )

# 4. Fit the model

In [42]:
# fit the keras model on the dataset

model.fit(X_train, 
          y_train, 
          validation_data=(X_test,y_test), 
          epochs=10, 
          batch_size=10,
          class_weight=class_weight,
          callbacks = [early_stopping_monitor]
         )

Epoch 1/10


ValueError: in user code:

    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:755 train_step
        loss = self.compiled_loss(
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (10, 1) and (10, 2) are incompatible


# 5. Evaluate

In [29]:
# evaluate the keras model
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 21.80


In [30]:
# make probability predictions with the model (they come in pairs)
y_probs = model.predict(X_test)
y_probs[:10]

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)

In [31]:
# express these as booleans
y_probs > 0.7

array([[ True],
       [ True],
       [ True],
       ...,
       [ True],
       [ True],
       [ True]])

In [32]:
# make class predictions with the model
y_preds = (y_probs > 0.5).astype(int)
y_preds[:10]

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1]])

In [33]:
# check out the first ten
print(y_preds[:10].tolist())
print(y_test[:10].tolist())

[[1], [1], [1], [1], [1], [1], [1], [1], [1], [1]]
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0]


In [34]:

# Evaluate the model
print(metrics.classification_report(y_test, y_preds))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      7742
           1       0.22      1.00      0.36      2158

    accuracy                           0.22      9900
   macro avg       0.11      0.50      0.18      9900
weighted avg       0.05      0.22      0.08      9900



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# 6. Make Predictions on new data


In [35]:
new_data = X_test[[0]]
model.predict(new_data)

array([[1.]], dtype=float32)

# 7. Save the model
https://machinelearningmastery.com/save-load-keras-deep-learning-models/

In [28]:
model.save("credit-model-1.h5")

In [29]:
# load model
from keras.models import load_model
model2 = load_model('credit-model-1.h5')

In [30]:
# make class predictions with the model
predictions = (model.predict(X) > 0.5).astype(int)
predictions[:10]

array([[0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [1, 0]])