In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import our input dataset
loans_df = pd.read_csv('loan_status.csv')
loans_df.head()

Unnamed: 0,Loan_Status,Current_Loan_Amount,Term,Credit_Score,Annual_Income,Years_in_current_job,Home_Ownership,Purpose,Monthly_Debt,Years_of_Credit_History,Months_since_last_delinquent,Number_of_Open_Accounts,Number_of_Credit_Problems,Current_Credit_Balance,Maximum_Open_Credit,Bankruptcies,Tax_Liens
0,Fully_Paid,99999999,Short_Term,741.0,2231892.0,8_years,Own_Home,Debt_Consolidation,29200.53,14.9,29.0,18,1,297996,750090.0,0.0,0.0
1,Fully_Paid,217646,Short_Term,730.0,1184194.0,<_1_year,Home_Mortgage,Debt_Consolidation,10855.08,19.6,10.0,13,1,122170,272052.0,1.0,0.0
2,Fully_Paid,548746,Short_Term,678.0,2559110.0,2_years,Rent,Debt_Consolidation,18660.28,22.6,33.0,4,0,437171,555038.0,0.0,0.0
3,Fully_Paid,99999999,Short_Term,728.0,714628.0,3_years,Rent,Debt_Consolidation,11851.06,16.0,76.0,16,0,203965,289784.0,0.0,0.0
4,Fully_Paid,99999999,Short_Term,740.0,776188.0,<_1_year,Own_Home,Debt_Consolidation,11578.22,8.5,25.0,6,0,134083,220220.0,0.0,0.0


In [2]:
# Generate our categorical variable list
loans_cat = loans_df.dtypes[loans_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
loans_df[loans_cat].nunique()

Loan_Status              2
Term                     2
Years_in_current_job    11
Home_Ownership           4
Purpose                  7
dtype: int64

In [3]:
# Check the unique value counts to see if binning is required
loans_df.Years_in_current_job.value_counts()

10+_years    13149
2_years       3225
3_years       2997
<_1_year      2699
5_years       2487
4_years       2286
1_year        2247
6_years       2109
7_years       2082
8_years       1675
9_years       1467
Name: Years_in_current_job, dtype: int64

In [4]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(loans_df[loans_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(loans_cat)
encode_df.head()

Unnamed: 0,Loan_Status_Fully_Paid,Loan_Status_Not_Paid,Term_Long_Term,Term_Short_Term,Years_in_current_job_10+_years,Years_in_current_job_1_year,Years_in_current_job_2_years,Years_in_current_job_3_years,Years_in_current_job_4_years,Years_in_current_job_5_years,...,Home_Ownership_Home_Mortgage,Home_Ownership_Own_Home,Home_Ownership_Rent,Purpose_Business_Loan,Purpose_Buy_House,Purpose_Buy_a_Car,Purpose_Debt_Consolidation,Purpose_Home_Improvements,Purpose_Medical_Bills,Purpose_Other
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [5]:
# Merge one-hot encoded features and drop the originals
loans_df = loans_df.merge(encode_df,left_index=True, right_index=True)
loans_df = loans_df.drop(loans_cat,1)
loans_df.head()

Unnamed: 0,Current_Loan_Amount,Credit_Score,Annual_Income,Monthly_Debt,Years_of_Credit_History,Months_since_last_delinquent,Number_of_Open_Accounts,Number_of_Credit_Problems,Current_Credit_Balance,Maximum_Open_Credit,...,Home_Ownership_Home_Mortgage,Home_Ownership_Own_Home,Home_Ownership_Rent,Purpose_Business_Loan,Purpose_Buy_House,Purpose_Buy_a_Car,Purpose_Debt_Consolidation,Purpose_Home_Improvements,Purpose_Medical_Bills,Purpose_Other
0,99999999,741.0,2231892.0,29200.53,14.9,29.0,18,1,297996,750090.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,217646,730.0,1184194.0,10855.08,19.6,10.0,13,1,122170,272052.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,548746,678.0,2559110.0,18660.28,22.6,33.0,4,0,437171,555038.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,99999999,728.0,714628.0,11851.06,16.0,76.0,16,0,203965,289784.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,99999999,740.0,776188.0,11578.22,8.5,25.0,6,0,134083,220220.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [6]:
# Remove loan status target from features data
y = loans_df.Loan_Status_Fully_Paid
X = loans_df.drop(columns=["Loan_Status_Fully_Paid","Loan_Status_Not_Paid"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [7]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.849


In [8]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
285/285 - 0s - loss: 0.3917 - accuracy: 0.8474
Loss: 0.39169231057167053, Accuracy: 0.8473533987998962


In [9]:
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 24)                888       
_________________________________________________________________
dense_1 (Dense)              (None, 12)                300       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 13        
Total params: 1,201
Trainable params: 1,201
Non-trainable params: 0
_________________________________________________________________


In [50]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [51]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq=100)

# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
 82/854 [=>............................] - ETA: 0s - loss: 0.3584 - accuracy: 0.8559
Epoch 00001: saving model to checkpoints\weights.01.hdf5
194/854 [=====>........................] - ETA: 0s - loss: 0.3535 - accuracy: 0.8579
Epoch 00001: saving model to checkpoints\weights.01.hdf5
Epoch 00001: saving model to checkpoints\weights.01.hdf5
Epoch 00001: saving model to checkpoints\weights.01.hdf5
Epoch 00001: saving model to checkpoints\weights.01.hdf5
Epoch 00001: saving model to checkpoints\weights.01.hdf5
Epoch 00001: saving model to checkpoints\weights.01.hdf5
Epoch 00001: saving model to checkpoints\weights.01.hdf5
Epoch 2/100
 41/854 [>.............................] - ETA: 3s - loss: 0.3587 - accuracy: 0.8482
Epoch 00002: saving model to checkpoints\weights.02.hdf5
142/854 [===>..........................] - ETA: 2s - loss: 0.3488 - accuracy: 0.8554
Epoch 00002: saving model to checkpoints\weights.02.hdf5
Epoch 00002: saving model to checkpoints\weights.02.hdf5
Epoch 000

Epoch 00013: saving model to checkpoints\weights.13.hdf5
Epoch 00013: saving model to checkpoints\weights.13.hdf5
Epoch 00013: saving model to checkpoints\weights.13.hdf5
Epoch 00013: saving model to checkpoints\weights.13.hdf5
Epoch 00013: saving model to checkpoints\weights.13.hdf5
Epoch 00013: saving model to checkpoints\weights.13.hdf5
Epoch 14/100
 79/854 [=>............................] - ETA: 2s - loss: 0.3696 - accuracy: 0.8453
Epoch 00014: saving model to checkpoints\weights.14.hdf5
198/854 [=====>........................] - ETA: 1s - loss: 0.3570 - accuracy: 0.8524
Epoch 00014: saving model to checkpoints\weights.14.hdf5
Epoch 00014: saving model to checkpoints\weights.14.hdf5
Epoch 00014: saving model to checkpoints\weights.14.hdf5
Epoch 00014: saving model to checkpoints\weights.14.hdf5
Epoch 00014: saving model to checkpoints\weights.14.hdf5
Epoch 00014: saving model to checkpoints\weights.14.hdf5
Epoch 00014: saving model to checkpoints\weights.14.hdf5
Epoch 15/100
 28/85

Epoch 00025: saving model to checkpoints\weights.25.hdf5
Epoch 00025: saving model to checkpoints\weights.25.hdf5
Epoch 26/100
 37/854 [>.............................] - ETA: 2s - loss: 0.3398 - accuracy: 0.8670
Epoch 00026: saving model to checkpoints\weights.26.hdf5
141/854 [===>..........................] - ETA: 2s - loss: 0.3423 - accuracy: 0.8624
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 00026: saving model to checkpoints\weights.26.hdf5
Epoch 27/100
 81/854 [=>............................] - ETA: 1s - loss: 0.3451 - accuracy: 0.8619
Epoch 00027: saving model to checkpoints\weights.27.hdf5
195/854 [=====>....................

Epoch 00038: saving model to checkpoints\weights.38.hdf5
190/854 [=====>........................] - ETA: 2s - loss: 0.3466 - accuracy: 0.8550
Epoch 00038: saving model to checkpoints\weights.38.hdf5
Epoch 00038: saving model to checkpoints\weights.38.hdf5
Epoch 00038: saving model to checkpoints\weights.38.hdf5
Epoch 00038: saving model to checkpoints\weights.38.hdf5
Epoch 00038: saving model to checkpoints\weights.38.hdf5
Epoch 00038: saving model to checkpoints\weights.38.hdf5
Epoch 00038: saving model to checkpoints\weights.38.hdf5
Epoch 39/100
 44/854 [>.............................] - ETA: 5s - loss: 0.3720 - accuracy: 0.8405
Epoch 00039: saving model to checkpoints\weights.39.hdf5
144/854 [====>.........................] - ETA: 3s - loss: 0.3524 - accuracy: 0.8528
Epoch 00039: saving model to checkpoints\weights.39.hdf5
Epoch 00039: saving model to checkpoints\weights.39.hdf5
Epoch 00039: saving model to checkpoints\weights.39.hdf5
Epoch 00039: saving model to checkpoints\weights

Epoch 00050: saving model to checkpoints\weights.50.hdf5
Epoch 00050: saving model to checkpoints\weights.50.hdf5
Epoch 00050: saving model to checkpoints\weights.50.hdf5
Epoch 00050: saving model to checkpoints\weights.50.hdf5
Epoch 00050: saving model to checkpoints\weights.50.hdf5
Epoch 51/100
 93/854 [==>...........................] - ETA: 2s - loss: 0.3509 - accuracy: 0.8565
Epoch 00051: saving model to checkpoints\weights.51.hdf5
190/854 [=====>........................] - ETA: 1s - loss: 0.3432 - accuracy: 0.8613
Epoch 00051: saving model to checkpoints\weights.51.hdf5
Epoch 00051: saving model to checkpoints\weights.51.hdf5
Epoch 00051: saving model to checkpoints\weights.51.hdf5
Epoch 00051: saving model to checkpoints\weights.51.hdf5
Epoch 00051: saving model to checkpoints\weights.51.hdf5
Epoch 00051: saving model to checkpoints\weights.51.hdf5
Epoch 00051: saving model to checkpoints\weights.51.hdf5
Epoch 52/100
 41/854 [>.............................] - ETA: 2s - loss: 0.32

Epoch 00062: saving model to checkpoints\weights.62.hdf5
Epoch 63/100
 38/854 [>.............................] - ETA: 2s - loss: 0.3115 - accuracy: 0.8760
Epoch 00063: saving model to checkpoints\weights.63.hdf5
130/854 [===>..........................] - ETA: 1s - loss: 0.3194 - accuracy: 0.8713
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 00063: saving model to checkpoints\weights.63.hdf5
Epoch 64/100
 81/854 [=>............................] - ETA: 1s - loss: 0.3445 - accuracy: 0.8568
Epoch 00064: saving model to checkpoints\weights.64.hdf5
180/854 [=====>........................] - ETA: 1s - loss: 0.3439 - accuracy: 0.8563
Epoch 0

Epoch 00075: saving model to checkpoints\weights.75.hdf5
Epoch 00075: saving model to checkpoints\weights.75.hdf5
Epoch 00075: saving model to checkpoints\weights.75.hdf5
Epoch 00075: saving model to checkpoints\weights.75.hdf5
Epoch 00075: saving model to checkpoints\weights.75.hdf5
Epoch 00075: saving model to checkpoints\weights.75.hdf5
Epoch 76/100
 27/854 [..............................] - ETA: 1s - loss: 0.3394 - accuracy: 0.8640
Epoch 00076: saving model to checkpoints\weights.76.hdf5
135/854 [===>..........................] - ETA: 1s - loss: 0.3512 - accuracy: 0.8580
Epoch 00076: saving model to checkpoints\weights.76.hdf5
Epoch 00076: saving model to checkpoints\weights.76.hdf5
Epoch 00076: saving model to checkpoints\weights.76.hdf5
Epoch 00076: saving model to checkpoints\weights.76.hdf5
Epoch 00076: saving model to checkpoints\weights.76.hdf5
Epoch 00076: saving model to checkpoints\weights.76.hdf5
Epoch 00076: saving model to checkpoints\weights.76.hdf5
Epoch 00076: saving

Epoch 00087: saving model to checkpoints\weights.87.hdf5
Epoch 00087: saving model to checkpoints\weights.87.hdf5
Epoch 88/100
  1/854 [..............................] - ETA: 1s - loss: 0.2615 - accuracy: 0.8750
Epoch 00088: saving model to checkpoints\weights.88.hdf5
 86/854 [==>...........................] - ETA: 2s - loss: 0.3262 - accuracy: 0.8639
Epoch 00088: saving model to checkpoints\weights.88.hdf5
195/854 [=====>........................] - ETA: 2s - loss: 0.3328 - accuracy: 0.8614
Epoch 00088: saving model to checkpoints\weights.88.hdf5
Epoch 00088: saving model to checkpoints\weights.88.hdf5
Epoch 00088: saving model to checkpoints\weights.88.hdf5
Epoch 00088: saving model to checkpoints\weights.88.hdf5
Epoch 00088: saving model to checkpoints\weights.88.hdf5
Epoch 00088: saving model to checkpoints\weights.88.hdf5
Epoch 00088: saving model to checkpoints\weights.88.hdf5
Epoch 89/100
 35/854 [>.............................] - ETA: 2s - loss: 0.3084 - accuracy: 0.8857
Epoch 0

Epoch 00100: saving model to checkpoints\weights.100.hdf5
144/854 [====>.........................] - ETA: 1s - loss: 0.3448 - accuracy: 0.8610
Epoch 00100: saving model to checkpoints\weights.100.hdf5
Epoch 00100: saving model to checkpoints\weights.100.hdf5
Epoch 00100: saving model to checkpoints\weights.100.hdf5
Epoch 00100: saving model to checkpoints\weights.100.hdf5
Epoch 00100: saving model to checkpoints\weights.100.hdf5
Epoch 00100: saving model to checkpoints\weights.100.hdf5
Epoch 00100: saving model to checkpoints\weights.100.hdf5
Epoch 00100: saving model to checkpoints\weights.100.hdf5
285/285 - 0s - loss: 0.4157 - accuracy: 0.8447
Loss: 0.4156859517097473, Accuracy: 0.8447177410125732


In [58]:
# Define the model - deep neural net
number_input_features = X_train.shape[1]
hidden_nodes_layer1 =  24
hidden_nodes_layer2 = 12

nn_new = tf.keras.models.Sequential()

# First hidden layer
nn_new.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn_new.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn_new.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
nn_new.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Restore the model weights
nn_new.load_weights("checkpoints/weights.100.hdf5")

# Evaluate the model using the test data
model_loss, model_accuracy = nn_new.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

285/285 - 0s - loss: 0.4157 - accuracy: 0.8447
Loss: 0.4156859517097473, Accuracy: 0.8447177410125732
