In [13]:
### Required libraries ###

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from keras.models import Model, Sequential
from keras.layers import Dropout, Dense, Input, BatchNormalization, Activation, Add, LSTM, Softmax, Bidirectional, Conv1D
from keras.optimizers import Adam
from keras.utils import to_categorical, normalize
from keras import backend as K

%matplotlib inline

In [14]:
### Dataset ###

# reading dataset
dataset = pd.read_csv(r"D:\Data\Dinesh\Work\DLTraining\UCI Credit Card - Demographics Model\dataset\UCI_Credit_Card.csv", index_col=0)
dataset["AGE_CAT"] = pd.cut(dataset["AGE"], [0, 15, 25, 35, 45, 55, 80], labels=[15, 25, 35, 45, 55, 80])

neg_num_categorical_columns = ["PAY_0", "PAY_2", "PAY_3", "PAY_4", "PAY_5", "PAY_6"]
for column in neg_num_categorical_columns:
    min_value = abs(dataset[column].min())
    dataset[column] += abs(min_value)
    
dataset = dataset[[
    'PAY_6', 'BILL_AMT6', 'PAY_AMT6', 'PAY_5', 'BILL_AMT5', 'PAY_AMT5', 'PAY_4', 'BILL_AMT4', 'PAY_AMT4', 
    'PAY_3', 'BILL_AMT3', 'PAY_AMT3', 'PAY_2', 'BILL_AMT2', 'PAY_AMT2', 'PAY_0', 'BILL_AMT1', 'PAY_AMT1',
    "EDUCATION", "MARRIAGE", "SEX", "AGE_CAT", "default.payment.next.month"
]]

In [16]:
### Data Preprocessing ###

dataset = np.array(dataset)

X = np.concatenate(
    (
        to_categorical(dataset[: , 0]),
        dataset[:, [1, 2]],
        to_categorical(dataset[: , 3]),
        dataset[:, [4, 5]],
        to_categorical(dataset[: , 6]),
        dataset[:, [7, 8]],
        to_categorical(dataset[: , 9]),
        dataset[:, [10, 11]],
        to_categorical(dataset[: , 12]),
        dataset[:, [13, 14]],
        to_categorical(dataset[: , 15]),
        dataset[:, [16, 17]]
    )
    , axis = 1
)
Y_education = to_categorical(dataset[:, 18])
Y_sex = to_categorical(dataset[:, 20])
Y_marriage = to_categorical(dataset[:, 19])
Y_age_cat = to_categorical(dataset[:, 21])
Y_default = to_categorical(dataset[:, 22])

X = normalize(X, axis=-1, order=1)

print(X.shape, Y_education.shape, to_categorical(dataset[: , 15]).shape)

(30000, 78) (30000, 7) (30000, 11)


In [17]:
### Train and Test set preparation ###

# Splitting into test, train and dev set for each of the demographic label. All demographic labels can also be kept as single Y
X_education_temp, X_education_test, Y_education_temp, Y_education_test = train_test_split(X, Y_education, test_size=0.10, random_state=5)
X_education_train, X__educationdev, Y_education_train, Y_education_dev = train_test_split(X_education_temp, Y_education_temp, test_size=0.10, random_state=5)
X_sex_temp, X_sex_test, Y_sex_temp, Y_sex_test = train_test_split(X, Y_sex, test_size=0.10, random_state=5)
X_sex_train, X_sex_dev, Y_sex_train, Y_sex_dev = train_test_split(X_sex_temp, Y_sex_temp, test_size=0.10, random_state=5)
X_marriage_temp, X_marriage_test, Y_marriage_temp, Y_marriage_test = train_test_split(X, Y_marriage, test_size=0.10, random_state=5)
X_marriage_train, X_marriage_dev, Y_marriage_train, Y_marriage_dev = train_test_split(X_marriage_temp, Y_marriage_temp, test_size=0.10, random_state=5)
X_age_cat_temp, X_age_cat_test, Y_age_cat_temp, Y_age_cat_test = train_test_split(X, Y_age_cat, test_size=0.10, random_state=5)
X_age_cat_train, X_age_cat_dev, Y_age_cat_train, Y_age_cat_dev = train_test_split(X_age_cat_temp, Y_age_cat_temp, test_size=0.10, random_state=5)
'''
Note: Final datasets for 'education'(modify above two lines for any other demography) 
            - X_train, Y_education_train, X_dev, Y_education_dev, X_test, Y_education_test
'''
None

In [20]:
def build_sequence_model(n_X, n_Y, n_X_dim, output_activation, loss_function, do_conv = False, stride = 1, kernel_size = 1):
    X_input = Input(shape = (n_X, n_X_dim, ))
    A = X_input
    if do_conv:
        A = Conv1D(filters=1, kernel_size=kernel_size, stride=stride)(X_input)
    A = Bidirectional(LSTM(3))(A)
    A = Dense(16, activation = "relu")(A)
    Y_layer = Dense(n_Y, activation = output_activation)(A)

    model = Model(inputs = (X_input), outputs = (Y_layer))
    opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay = 0.002)
    model.compile(loss=loss_function, optimizer=opt, metrics=['accuracy'])

    return model

In [22]:
X_train = X_education_train
Y_train = Y_education_train
X_test = X_education_test
Y_test = Y_education_test
m = X_train.shape[0]
n_X = X_train.shape[1]
n_X_dim = 1
n_Y = Y_train.shape[1]

# clearing session
K.clear_session()

# build model with a set of dimensions
model = build_sequence_model(n_X, n_Y, n_X_dim, "softmax", "categorical_crossentropy")
# print out the model layers
# model.summary()

# training model
model.fit(X_train.reshape(m, n_X, n_X_dim), Y_train, epochs=2, batch_size=64)
model.evaluate(X_test.reshape(X_test.shape[0], n_X, n_X_dim), Y_test)

Epoch 1/2
Epoch 2/2


[1.0801251598993937, 0.4716666666666667]