In [None]:
from Bayesian_Neural_Network import bayesian_neural_network
from BNN_Learning_Rate_Tuning import bnn_learning_rate_tuning

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(10120024)

# Data Preparation

In [None]:
# load the dataset
df = pd.read_csv('fraud.csv', 
                    usecols=['type', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'isFraud'], 
                    nrows=2500)

# perform one-hot-eencoding on the column 'type'
dummies_df = pd.get_dummies(df['type'], dtype=int)

# concat the original data with the one-hot-encoded data
data = pd.concat((df, dummies_df), axis=1) \
            .drop(columns=['type'])

# perform min-max standarization
for col in data.columns:
    data[col] -= np.min(data[col]) 
    data[col] /= np.max(data[col])

data['isFraud'].replace(0, -1, inplace=True)

In [None]:
# percentage of fraud and non-fraud label in the data
data['isFraud'].value_counts().to_frame('percentage') / np.sum(data['isFraud'].value_counts()) * 100

In [None]:
# specify the feature and target data
feature_columns = data.columns[data.columns != 'isFraud']
target_columns = ['isFraud']

feature_data = data.loc[:, feature_columns]
target_data = data.loc[:, target_columns]

In [None]:
feature_data.head()

In [None]:
target_data.head()

In [None]:
fraud_data = data.loc[data['isFraud'] == 1, :]
fraud_feature_data = fraud_data.loc[:, feature_columns].values
fraud_target_data = fraud_data.loc[:, target_columns].values 

non_fraud_data = data.loc[data['isFraud'] == -1, :]
non_fraud_feature_data = non_fraud_data.head(100).loc[:, feature_columns].values
non_fraud_target_data = non_fraud_data.head(100).loc[:, target_columns].values

feature_data = pd.concat((fraud_data.loc[:, feature_columns], non_fraud_data.head(25).loc[:, feature_columns])).values
target_data = pd.concat((fraud_data.loc[:, target_columns], non_fraud_data.head(25).loc[:, target_columns])).values

# Model Learning Rate Tuning

In [None]:
input_layer = [len(feature_columns)]
hidden_layers = [32, 16, 8, 4, 2]
output_layer = [1]

initial_lr_power = 20
end_lr_power = 24
total_iters = 4
tuning_epochs = 10

model_purpose = 'binary'

In [None]:
bnn_lr_tuning = bnn_learning_rate_tuning(input_layer, 
                                         hidden_layers, 
                                         output_layer, 
                                         feature_data, 
                                         target_data,
                                         model_purpose=model_purpose, 
                                         initial_lr_power=initial_lr_power, 
                                         end_lr_power=end_lr_power, 
                                         total_iters=total_iters, 
                                         tuning_epochs=tuning_epochs)

In [None]:
bnn_lr_tuning.perform_learning_rate_tuning()

In [None]:
bnn_lr_tuning.visualize_learning_rate_tuning()

# Model Performance on Predicting Fraud Data (Labeled as 1)

In [None]:
input_layer = [len(feature_columns)]
hidden_layers = [32, 16, 8, 4, 2]
output_layer = [1]

total_epochs = 10
initial_lr = 1
end_lr = 5e-2
validation_percentage = 0.1

model_purpose = 'binary'
learning_rate_decay_type = 'exponential'

In [None]:
bnn = bayesian_neural_network(input_layer, 
                                hidden_layers, 
                                output_layer, 
                                feature_data, 
                                target_data, 
                                validation_percentage=validation_percentage, 
                                model_purpose='binary', 
                                initial_lr=initial_lr, 
                                end_lr=end_lr,
                                learning_rate_decay_type=learning_rate_decay_type,
                                total_epochs=total_epochs)

In [None]:
# model performance before training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data, bnn.model_structure, model_purpose=bnn.model_purpose)[0]

In [None]:
bnn.train_model()

In [None]:
bnn.visualize_model_performance()

In [None]:
# model performance after training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data, bnn.model_structure, model_purpose=bnn.model_purpose)[0]

# Model Performance on Predicting Non-Fraud Data (Labeled With -1)

In [None]:
input_layer = [len(feature_columns)]
hidden_layers = [32, 16, 8, 4, 2]
output_layer = [1]

total_epochs = 25
initial_lr = 1
end_lr = 1e-2
validation_percentage = 0.1

model_purpose = 'binary'
learning_rate_decay_type = 'exponential'

In [None]:
bnn = bayesian_neural_network(input_layer, 
                                hidden_layers, 
                                output_layer, 
                                feature_data, 
                                target_data, 
                                validation_percentage=validation_percentage, 
                                model_purpose='binary', 
                                initial_lr=initial_lr, 
                                end_lr=end_lr,
                                learning_rate_decay_type=learning_rate_decay_type,
                                total_epochs=total_epochs)

In [None]:
# model performance before training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data, bnn.model_structure, model_purpose=bnn.model_purpose)[0]

In [None]:
bnn.train_model()

In [None]:
bnn.visualize_model_performance()

In [None]:
# model performance after training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data, bnn.model_structure, model_purpose=bnn.model_purpose)[0]

# Model Performance on Fraud and Non-Fraud Data

In [None]:
input_layer = [len(feature_columns)]
hidden_layers = [32, 16, 8, 4, 2]
output_layer = [1]

total_epochs = 100
initial_lr = 1e-4
end_lr = 1e-8
# validation_percentage = 0.1

model_purpose = 'binary'
learning_rate_decay_type = 'exponential'

In [None]:
bnn = bayesian_neural_network(input_layer, 
                              hidden_layers, 
                              output_layer, 
                              feature_data, 
                              target_data, 
                              validation_percentage=None,
                              model_purpose=model_purpose, 
                              initial_lr=initial_lr,
                              end_lr=end_lr)
bnn.standardize_dataset()
# bnn.generate_validation_training_dataset()
bnn.generate_m()
bnn.generate_v()

In [None]:
# model performance on predicting fraudulent data before training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data[bnn.target_data == 1], bnn.model_structure, model_purpose=bnn.model_purpose)[0]

In [None]:
# model performance on predicting non-fraudulent data before training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data[bnn.target_data == -1], bnn.model_structure, model_purpose=bnn.model_purpose)[0]

In [None]:
bnn.train_model(total_epochs, learning_rate_decay_type)

In [None]:
bnn.visualize_model_performance()

In [None]:
# model performance on predicting fraudulent data before training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data[bnn.target_data == 1], bnn.model_structure, model_purpose=bnn.model_purpose)[0]

In [None]:
# model performance on predicting non-fraudulent data before training
bnn.bnn_fp.feed_forward_neural_network(bnn.m, bnn.v, bnn.feature_data[bnn.target_data == -1], bnn.model_structure, model_purpose=bnn.model_purpose)[0]

# Model Performance On The Data

In [None]:
input_layer = [len(feature_columns)]
hidden_layers = [32, 16, 8, 4, 2]
output_layer = [1]

total_epochs = 100
initial_lr = 1e-30
end_lr = 1e-32
validation_percentage = 0.1

error_type = 'accuracy'
learning_rate_decay_type = 'exponential'

In [None]:
bnn = bayesian_neural_network(input_layer, 
                              hidden_layers, 
                              output_layer, 
                              feature_data, 
                              target_data, 
                              validation_percentage=validation_percentage,
                              error_type=error_type, 
                              initial_lr=initial_lr,
                              end_lr=end_lr)
bnn.standardize_dataset()
bnn.generate_validation_training_dataset()
bnn.generate_m()
bnn.generate_v()

In [None]:
bnn.train_model(total_epochs, learning_rate_decay_type)

In [None]:
bnn.visualize_model_performance()