Setup

In [94]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.decomposition import PCA
from tensorflow.keras import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import TensorBoard

# Create a TensorBoard callback
tensorboard = TensorBoard(log_dir='./logs', histogram_freq=1)

Read and transform the data

In [136]:
# load dataset
CCData = pd.read_csv("Credit_card.csv")
CCData2 = pd.read_csv("Credit_card_label.csv")
X = CCData[["Ind_ID","GENDER","Car_Owner","Propert_Owner","CHILDREN","Annual_income","Type_Income","EDUCATION",
        "Marital_status","Housing_type","Birthday_count","Employed_days","Mobile_phone","Work_Phone","Phone",
        "EMAIL_ID","Type_Occupation","Family_Members"]]
Y = CCData2[["label"]]

#Find and replace all 'NaN data
#Columns with string data get "N/A"
#Columns with numbered data get 0
listNan = X.loc[:, X.isna().any()]
print('Columns with NaN data:')
print(list(listNan))
X['GENDER'] = X['GENDER'].fillna('N/A')
X['Type_Occupation'] = X['Type_Occupation'].fillna('N/A')
X['Annual_income'] = X['Annual_income'].fillna(0)
X['Birthday_count'] = X['Birthday_count'].fillna(0)

#Encode the catagorical data
le = LabelEncoder()
X['GENDER'] = le.fit_transform(X.GENDER.values)
X['Car_Owner'] = le.fit_transform(X.Car_Owner.values)
X['Propert_Owner'] = le.fit_transform(X.Propert_Owner.values)
X['Type_Income'] = le.fit_transform(X.Type_Income.values)
X['EDUCATION'] = le.fit_transform(X.EDUCATION.values)
X['Marital_status'] = le.fit_transform(X.Marital_status.values)
X['Housing_type'] = le.fit_transform(X.Housing_type.values)
X['Type_Occupation'] = le.fit_transform(X.Type_Occupation.values)

# Standardizing the features and perform pca
X = StandardScaler().fit_transform(X)
pca = PCA(n_components=5)
principalComponents = pca.fit_transform(X)

# Split the dataset into training and test sets
X, X_test, Y, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler().fit(X)
X = scaler.transform(X)
X_test = scaler.transform(X_test)

#Turn Y into a matrix
Y = np.ravel(Y)
encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)

Columns with NaN data:
['GENDER', 'Annual_income', 'Birthday_count', 'Type_Occupation']


Create and train the model

In [139]:
avg_accuracy = 0
for i in range(10):
  #Create the model
  model = Sequential()
  model.add(Dense(16, input_dim=X.shape[1], activation='sigmoid'))
  model.add(Dense(16, activation='sigmoid'))
  model.add(Dense(4, activation='sigmoid'))
  #model.add(Dense(2, activation='sigmoid'))  # Output layer for 'sparse_categorical_crossentropy'
  model.add(Dense(1, activation='sigmoid'))  # Output layer for all of the others
  model.compile(loss='mse',
                optimizer=Adam(learning_rate=0.01),
                metrics=['Accuracy'])
  #Training the model
  history = model.fit(X, Y, epochs=10, batch_size=32, validation_data=(X_test, y_test), callbacks=[tensorboard])
  avg_accuracy = avg_accuracy + model.evaluate(X_test,y_test)[1]
avg_accuracy = avg_accuracy/10
print(avg_accuracy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

**1: 2 componants with learning rate = 0.01**
1.   loss='sparse_categorical_crossentropy', Accuracy = 0.903
2.   loss='mse', Accuracy = 0.903
3.   loss='msle', Accuracy = 0.903
4.   loss = 'poisson', Accuracy = 0.903
5.   loss='binary_crossentropy', Accuracy = 0.903

**2: 3 componants with learning rate = 0.01**
1.   loss='mse', Accuracy = 0.8829




