In [1]:
### Required libraries ###

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from keras.models import Model, Sequential
from keras.layers import Dropout, Dense, Input, BatchNormalization, Activation, Add, LSTM, Softmax, Bidirectional, Conv1D
from keras.optimizers import Adam
from keras.utils import to_categorical, normalize
from keras import backend as K

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
### Dataset ###

# reading dataset
dataset = pd.read_csv(r"D:\Data\Dinesh\Work\DLTraining\UCI Credit Card - Demographics Model\dataset\UCI_Credit_Card.csv", index_col=0)
dataset["AGE_CAT"] = pd.cut(dataset["AGE"], [0, 15, 25, 35, 45, 55, 80], labels=[15, 25, 35, 45, 55, 80])

neg_num_categorical_columns = ["PAY_0", "PAY_2", "PAY_3", "PAY_4", "PAY_5", "PAY_6"]
for column in neg_num_categorical_columns:
    min_value = abs(dataset[column].min())
    dataset[column] += abs(min_value)

In [3]:
### Data Preprocessing ###

# convert to numpy array
dataset = np.array(dataset, dtype=np.float32)

m = dataset.shape[0] # no. of training examples
# preparing X, adding one_hot encoding wherever needed
X = np.concatenate((
    dataset[:, 0].reshape(m, -1), # limit
    to_categorical(dataset[:, 5]), # pay0
    to_categorical(dataset[:, 6]), # pay2
    to_categorical(dataset[:, 7]), # pay3
    to_categorical(dataset[:, 8]), # pay4
    to_categorical(dataset[:, 9]), # pay5
    dataset[:, 10:23], # bill amount and previous month payment
    to_categorical(dataset[:, 23]) # default payment flag  
), axis=1)

Y_sex = to_categorical(dataset[:, 1])
Y_education = to_categorical(dataset[:, 2])
Y_marriage = to_categorical(dataset[:, 3])
Y_age = dataset[:, 4].reshape(m, -1)
Y_age_cat = to_categorical(dataset[:, 24])
print(X.shape, Y_sex.shape, Y_education.shape, Y_marriage.shape, Y_age.shape, Y_age_cat.shape)

# print(X[0])
X = normalize(X, axis= -1, order=2)
# Y_age = normalize(Y_age, axis= -1, order=2)
# print(X[0])

(30000, 71) (30000, 3) (30000, 7) (30000, 4) (30000, 1) (30000, 81)


In [4]:
### Train and Test set preparation ###

# Splitting into test, train and dev set for each of the demographic label. All demographic labels can also be kept as single Y
X_education_temp, X_education_test, Y_education_temp, Y_education_test = train_test_split(X, Y_education, test_size=0.10, random_state=5)
X_education_train, X__educationdev, Y_education_train, Y_education_dev = train_test_split(X_education_temp, Y_education_temp, test_size=0.10, random_state=5)
X_sex_temp, X_sex_test, Y_sex_temp, Y_sex_test = train_test_split(X, Y_sex, test_size=0.10, random_state=5)
X_sex_train, X_sex_dev, Y_sex_train, Y_sex_dev = train_test_split(X_sex_temp, Y_sex_temp, test_size=0.10, random_state=5)
X_marriage_temp, X_marriage_test, Y_marriage_temp, Y_marriage_test = train_test_split(X, Y_marriage, test_size=0.10, random_state=5)
X_marriage_train, X_marriage_dev, Y_marriage_train, Y_marriage_dev = train_test_split(X_marriage_temp, Y_marriage_temp, test_size=0.10, random_state=5)
X_age_temp, X_age_test, Y_age_temp, Y_age_test = train_test_split(X, Y_age, test_size=0.10, random_state=5)
X_age_train, _ageX_dev, Y_age_train, Y_age_dev = train_test_split(X_age_temp, Y_age_temp, test_size=0.10, random_state=5)
X_age_cat_temp, X_age_cat_test, Y_age_cat_temp, Y_age_cat_test = train_test_split(X, Y_age_cat, test_size=0.10, random_state=5)
X_age_cat_train, X_age_cat_dev, Y_age_cat_train, Y_age_cat_dev = train_test_split(X_age_cat_temp, Y_age_cat_temp, test_size=0.10, random_state=5)
'''
Note: Final datasets for 'education'(modify above two lines for any other demography) 
            - X_train, Y_education_train, X_dev, Y_education_dev, X_test, Y_education_test
'''
None

In [5]:
### Model ###

def build_model(layers_dims, n_X, n_Y):
    
    # input layer
    X_input = Input(shape = (n_X,))
    A = X_input

    # hidden layer
    # list having no. of activations in each layer
    add_this_a = None
    fix_add_this_a = False
#     layers_dims = [8, 12, 16, 16, 12, 8]
    for i, dim in enumerate(layers_dims):

        layer_no = i + 1

#         skip_add = True if (layer_no%3 == 0 and layer_no/3 > 1) else False # for skip connections, every 3 layers
#         add_this_A = True if (layer_no + 2)%3 == 0 else False # for skip connections

        A = Dense(dim, activation = "linear")(A) # to change activation check here, https://keras.io/activations/

#         if skip_add == True: # for skip connections
#             Add()([A, to_add_A])

        A = Activation("relu")(A)

#         if add_this_A == True: # for skip connections
#             to_add_A = A

        A = BatchNormalization()(A)
    #     A = Dropout(0.7)(A) # add dropout, if needed


    Y_layer = Dense(n_Y, activation="softmax")(A)
    # Y_layer = Dense(1, activation="linear")(A)

    model = Model(inputs = [X_input], outputs = [Y_layer])

    # adam optimizer - try different learning rates here
    opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.002)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    # model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy'])
    
    return model

# prints model graph
# model.summary()

In [7]:
# X and Y for the current run, and their dimensions
X_train = X_education_train
Y_train = Y_education_train
X_test = X_education_test
Y_test = Y_education_test
m = X_train.shape[0] # no. of training examples
n_X = X_train.shape[1] # no. of features
n_Y = Y_train.shape[1] # no. of output classes

# clearing session
K.clear_session()

# build model with a set of dimensions
model = build_model([], n_X, n_Y)
# print out the model layers
# model.summary()

# training model
model.fit(X_train, Y_train, epochs=5, batch_size=64)
model.evaluate(X_test, Y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[1.0460895927747091, 0.5226666668256124]

In [8]:
# X and Y for the current run, and their dimensions
X_train = X_sex_train
Y_train = Y_sex_train
X_test = X_sex_test
Y_test = Y_sex_test
m = X_train.shape[0] # no. of training examples
n_X = X_train.shape[1] # no. of features
n_Y = Y_train.shape[1] # no. of output classes

# clearing session
K.clear_session()

# build model with a set of dimensions
model = build_model([], n_X, n_Y)
# print out the model layers
# model.summary()

# training model
model.fit(X_train, Y_train, epochs=5, batch_size=64)
model.evaluate(X_test, Y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.6709421194394429, 0.6033333333333334]

In [9]:
# X and Y for the current run, and their dimensions
X_train = X_marriage_train
Y_train = Y_marriage_train
X_test = X_marriage_test
Y_test = Y_marriage_test
m = X_train.shape[0] # no. of training examples
n_X = X_train.shape[1] # no. of features
n_Y = Y_train.shape[1] # no. of output classes

# clearing session
K.clear_session()

# build model with a set of dimensions
model = build_model([], n_X, n_Y)
# print out the model layers
# model.summary()

# training model
model.fit(X_train, Y_train, epochs=5, batch_size=64)
model.evaluate(X_test, Y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.7396647079785665, 0.5573333331743876]

In [10]:
# X and Y for the current run, and their dimensions
X_train = X_age_cat_train
Y_train = Y_age_cat_train
X_test = X_age_cat_test
Y_test = Y_age_cat_test
m = X_train.shape[0] # no. of training examples
n_X = X_train.shape[1] # no. of features
n_Y = Y_train.shape[1] # no. of output classes

# clearing session
K.clear_session()

# build model with a set of dimensions
model = build_model([], n_X, n_Y)
# print out the model layers
# model.summary()

# training model
model.fit(X_train, Y_train, epochs=5, batch_size=64)
model.evaluate(X_test, Y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[1.362297682126363, 0.4196666667461395]