# Wide and Deep Networks for Credit Score Classification

By: Joe, Sellett, Haiyan Cai, and Cole Wagner

In [26]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import FeatureSpace


In [17]:
credit_df = pd.read_csv("credit_score_cleaned.csv")

## Data Preparation

### Drop Unnecessary Columns

In [18]:
credit_df = credit_df.drop(
    columns=["customer_id", "name", "ssn", "type_of_loan"]
)

### Preprocess Remaining Columns

In [23]:
def create_dataset_from_dataframe(
    df_input: pd.DataFrame, batch_size: int
) -> tf.data.Dataset:
    """Convert a pandas dataframe to a TensorFlow Dataset.

    Parameters
    ----------
    df_input : pd.DataFrame
        The input pandas dataframe containing the data.
    batch_size : int
        The number of rows per batch in the TensorFlow Dataset.

    Returns
    -------
    tf.data.Dataset
        A TensorFlow Dataset object created from the input dataframe.

    """
    pd_df = df_input.copy()
    labels = pd_df["credit_score"]
    pd_df = pd_df.drop(columns=["credit_score"])

    df_dict = {
        key: value.to_numpy()[:, np.newaxis]
        for key, value in df_input.items()
    }

    tf_ds = tf.data.Dataset.from_tensor_slices((dict(df_dict), labels))
    tf_ds = tf_ds.batch(batch_size)
    return tf_ds.prefetch(batch_size)


In [None]:
# Sample schema based on the dataframe info
categorical_features = [
    "month",
    "occupation",
    "credit_mix",
    "payment_of_min_amount",
    "payment_behaviour",
]
numeric_features = [
    "age",
    "annual_income",
    "monthly_inhand_salary",
    "credit_history_age",
    "total_emi_per_month",
    "num_bank_accounts",
    "num_credit_card",
    "interest_rate",
    "num_of_loan",
    "delay_from_due_date",
    "num_of_delayed_payment",
    "changed_credit_limit",
    "num_credit_inquiries",
    "outstanding_debt",
    "credit_utilization_ratio",
    "amount_invested_monthly",
    "monthly_balance",
]

# Define feature configs
feature_space = FeatureSpace(
    features={
        **{
            name: FeatureSpace.string_categorical(num_oov_indices=0)
            for name in categorical_features
        },
        **{
            name: FeatureSpace.float_normalized()
            for name in numeric_features
        },
    },
    crosses=[
        ("occupation", "credit_mix"),
        ("payment_of_min_amount", "payment_behaviour"),
    ],
    output_mode="concat",
)