In [None]:
#| default_exp _experiments.blog

# Loan


Lending club loan *data*
contains complete loan data for all loans
issued through 2007-2015 of several banks. Each data point is a 28-dimensional feature including
the current loan status, latest payment information, and other additional features. The task is to
predict loan defaulters given the feature vector. The possibility of loan default should be nondecreasing w.r.t. number of public record bankruptcies, Debt-to-Income ratio, and
non-increasing w.r.t. credit score, length of employment, annual income. Thus the `monotonicity_indicator` corrsponding to these features are set to 1.


References:

1. https://www.kaggle.com/wendykan/lending-club-loan-data (Note: Currently, the dataset seems to be withdrawn from kaggle)

In [None]:
#| include: false

from mono_dense_keras.experiments import get_train_n_test_data, find_hyperparameters, create_tuner_stats

In [None]:
#| include: false

from os import environ
import tensorflow as tf

In [None]:
#| include: false

environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

3 Physical GPUs, 1 Logical GPU


These are a few examples of the dataset:

In [None]:
#| echo: false

train_df, test_df = get_train_n_test_data(dataset_name="loan")
display(train_df.head().T.style)

Unnamed: 0,0,1,2,3,4
feature_0,0.833333,1.0,0.666667,0.333333,0.666667
feature_1,0.0,0.0,0.0,0.0,0.0
feature_2,0.4,1.0,0.8,0.5,0.7
feature_3,0.005263,0.003474,0.005263,0.007158,0.006842
feature_4,0.005185,0.023804,0.0297,0.024434,0.021962
feature_5,0.185751,0.13486,0.236641,0.745547,0.440204
feature_6,0.240654,0.036215,0.271807,0.778037,0.260125
feature_7,0.0,0.0,0.0,1.0,0.0
feature_8,0.0,0.0,0.0,0.0,0.0
feature_9,0.0,0.0,1.0,0.0,1.0


In [None]:
monotonicity_indicator = {
    f"feature_{i}": mi for i, mi in enumerate([-1, 1, -1, -1, 1] + [0] * 23)
}

batch_size = 256
max_epochs = 20

tuner = find_hyperparameters(
    "loan",
    monotonicity_indicator=monotonicity_indicator,
    max_trials=50,
    final_activation="sigmoid",
    loss="binary_crossentropy",
    metrics="accuracy",
    objective="val_accuracy",
    direction="max",
    batch_size=batch_size,
    max_epochs=max_epochs,
    executions_per_trial=1,
)

Trial 50 Complete [00h 10m 19s]
val_accuracy: 0.6469976902008057

Best val_accuracy So Far: 0.6515268087387085
Total elapsed time: 08h 20m 27s
INFO:tensorflow:Oracle triggered exit


In [None]:
# | include: false

stats = create_tuner_stats(
    tuner,
    batch_size=batch_size,
    max_epochs=max_epochs,
)

Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
0,32,4,elu,0.001,0.1,0.5,0.852322,0.651345,2.7e-05,0.651313,0.651384,9793


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,15,4,elu,0.00276,0.115498,0.176166,0.69864,0.651162,6.2e-05,0.6511,0.651256,1939
0,32,4,elu,0.001,0.1,0.5,0.852322,0.651345,2.7e-05,0.651313,0.651384,9793


In [None]:
#| echo: false

stats.sort_values(by="val_accuracy_mean", ascending=True).head().reset_index(drop=True).T.style