In [1]:
# numpy & pandas, berguna untuk manipulasi data
import numpy as np
import pandas as pd

# Tensorflow & Keras, berguna untuk pembuatan NN layer
from keras.layers import Dense
from keras import Input
# untuk pembuatan linear stack layer untuk NN
from keras.models import Sequential
from tensorflow import keras  # untuk pembuatan NN

# sklearn, berguna untuk penyiapan data model dan evaluasi model
import sklearn
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# plotly, berguna untuk menampilkan graph
import plotly
import plotly.graph_objects as go
import plotly.express as px

# mencetak versinya
print('numpy: %s' % np.__version__)
print('pandas: %s' % pd.__version__)
print('Tensorflow/Keras: %s' % keras.__version__)
print('sklearn: %s' % sklearn.__version__)
print('plotly: %s' % plotly.__version__)


numpy: 1.22.3
pandas: 1.4.1
Tensorflow/Keras: 2.10.0
sklearn: 1.1.1
plotly: 5.11.0


In [11]:
# Read file csv
df_audit_data = pd.read_csv('./data/audit_risk.csv', encoding='utf-8')

# Menghapus data kolom risk yang null
df_audit_data = df_audit_data[pd.isnull(df_audit_data['Risk']) == False]

# Mengisi kolom yang kosong dengan data rata - rata
df_audit_data = df_audit_data.fillna(df_audit_data.mean())

  df_audit_data = df_audit_data.fillna(df_audit_data.mean())


In [12]:
# Menampilkan kolom data
df_audit_data.dtypes

Sector_score      float64
LOCATION_ID        object
PARA_A            float64
Score_A           float64
Risk_A            float64
PARA_B            float64
Score_B           float64
Risk_B            float64
TOTAL             float64
numbers           float64
Score_B.1         float64
Risk_C            float64
Money_Value       float64
Score_MV          float64
Risk_D            float64
District_Loss       int64
PROB              float64
RiSk_E            float64
History             int64
Prob              float64
Risk_F            float64
Score             float64
Inherent_Risk     float64
CONTROL_RISK      float64
Detection_Risk    float64
Audit_Risk        float64
Risk                int64
dtype: object

In [46]:
# Menampilkan cuplikan data
df_audit_data

Unnamed: 0,Sector_score,LOCATION_ID,PARA_A,Score_A,Risk_A,PARA_B,Score_B,Risk_B,TOTAL,numbers,Score_B.1,Risk_C,Money_Value,Score_MV,Risk_D,District_Loss,PROB,RiSk_E,History,Prob,Risk_F,Score,Inherent_Risk,CONTROL_RISK,Detection_Risk,Audit_Risk,Risk
0,3.89,23,4.18,0.6,2.508,2.50,0.2,0.500,6.68,5.0,0.2,1.0,3.38,0.2,0.676,2,0.2,0.4,0,0.2,0.0,2.4,8.574,0.4,0.5,1.7148,1
1,3.89,6,0.00,0.2,0.000,4.83,0.2,0.966,4.83,5.0,0.2,1.0,0.94,0.2,0.188,2,0.2,0.4,0,0.2,0.0,2.0,2.554,0.4,0.5,0.5108,0
2,3.89,6,0.51,0.2,0.102,0.23,0.2,0.046,0.74,5.0,0.2,1.0,0.00,0.2,0.000,2,0.2,0.4,0,0.2,0.0,2.0,1.548,0.4,0.5,0.3096,0
3,3.89,6,0.00,0.2,0.000,10.80,0.6,6.480,10.80,6.0,0.6,3.6,11.75,0.6,7.050,2,0.2,0.4,0,0.2,0.0,4.4,17.530,0.4,0.5,3.5060,1
4,3.89,6,0.00,0.2,0.000,0.08,0.2,0.016,0.08,5.0,0.2,1.0,0.00,0.2,0.000,2,0.2,0.4,0,0.2,0.0,2.0,1.416,0.4,0.5,0.2832,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
771,55.57,9,0.49,0.2,0.098,0.40,0.2,0.080,0.89,5.0,0.2,1.0,0.00,0.2,0.000,2,0.2,0.4,0,0.2,0.0,2.0,1.578,0.4,0.5,0.3156,0
772,55.57,16,0.47,0.2,0.094,0.37,0.2,0.074,0.84,5.0,0.2,1.0,0.00,0.2,0.000,2,0.2,0.4,0,0.2,0.0,2.0,1.568,0.4,0.5,0.3136,0
773,55.57,14,0.24,0.2,0.048,0.04,0.2,0.008,0.28,5.0,0.2,1.0,0.00,0.2,0.000,2,0.2,0.4,0,0.2,0.0,2.0,1.456,0.4,0.5,0.2912,0
774,55.57,18,0.20,0.2,0.040,0.00,0.2,0.000,0.20,5.0,0.2,1.0,0.00,0.2,0.000,2,0.2,0.4,0,0.2,0.0,2.0,1.440,0.4,0.5,0.2880,0


In [102]:
pd.options.plotting.backend = "plotly"

fig = px.scatter_matrix(df_audit_data, dimensions=[ 'Money_Value', 'Inherent_Risk', 'Audit_Risk'], color="Score")
fig.show()

In [140]:
# Tahap 1 | pemilihan data untuk modelling
data_x = df_audit_data[['Inherent_Risk', 'Money_Value', 'Audit_Risk', 'Score']]
data_y = df_audit_data['Risk'].values


# Tahap 2 | pemisahan data sample training dan testing
train_x, test_x, train_y, test_y = train_test_split(
    data_x, data_y, test_size=0.2, random_state=0)


# Tahap 3 | penentuan struktur model NN
model_nn = Sequential(name="model-audit-data")
model_nn.add(Input(shape=(4,), name='Input-Layer'))  # Input Layer
model_nn.add(Dense(8, activation='sigmoid',
                   name='Hidden-Layer'))  # Hidden Layer
model_nn.add(Dense(1, activation='sigmoid',
             name='Output-Layer'))  # Output Layer


# Tahap 4 | compile model keras
model_nn.compile(optimizer='rmsprop',  # menggunakan algoritma rmsprop dalam backpropagation
                 loss='binary_crossentropy',
                 metrics=['Accuracy', 'Precision', 'Recall'],
                 loss_weights=None,
                 weighted_metrics=None,
                 run_eagerly=None,
                 steps_per_execution=None
                 )


# Tahap 5 | melakukan training dari model keras menggunakan dataset
model_nn.fit(train_x,  # input data
             train_y,  # target data
             batch_size=10,
             epochs=3,
             verbose='auto',
             callbacks=None,
             validation_split=0.2,
             shuffle=True,
             class_weight={0: 0.3, 1: 0.7},
             sample_weight=None,
             initial_epoch=0,
             steps_per_epoch=None,
             validation_steps=None,
             validation_batch_size=None,
             validation_freq=3,
             max_queue_size=10,
             workers=1,
             use_multiprocessing=False,
             )


# Tahap 6 | gunakan model untuk membuat prediksi
# Prediksi pada data training
pred_labels_tr = (model_nn.predict(train_x) > 0.5).astype(int)
# Prediksi pada data test
pred_labels_te = (model_nn.predict(test_x) > 0.5).astype(int)


# Tahap 7 | summary dari model
print("")
print('~~~~~~~~~~~~~~~~~~ Model Summary ~~~~~~~~~~~~~~~~~~')
model_nn.summary()
print("")
print('~~~~~~~~~~~~~~~~~~ Weights and Biases ~~~~~~~~~~~~~~~~~~')
for layer in model_nn.layers:
    print("Layer: ", layer.name)
    # kernels (weights)
    print("  --Kernels (Weights): ", layer.get_weights()[0])
    print("  --Biases: ", layer.get_weights()[1])  # biases

print("")
print('~~~~~~~~~~~~~~~~~~ Evaluation on Training Data ~~~~~~~~~~~~~~~~~~')
print(classification_report(train_y, pred_labels_tr))
print("")

print('~~~~~~~~~~~~~~~~~~ Evaluation on Test Data ~~~~~~~~~~~~~~~~~~')
print(classification_report(test_y, pred_labels_te))
print("")


Epoch 1/3
Epoch 2/3
Epoch 3/3

~~~~~~~~~~~~~~~~~~ Model Summary ~~~~~~~~~~~~~~~~~~
Model: "model-audit-data"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Hidden-Layer (Dense)        (None, 8)                 40        
                                                                 
 Output-Layer (Dense)        (None, 1)                 9         
                                                                 
Total params: 49
Trainable params: 49
Non-trainable params: 0
_________________________________________________________________

~~~~~~~~~~~~~~~~~~ Weights and Biases ~~~~~~~~~~~~~~~~~~
Layer:  Hidden-Layer
  --Kernels (Weights):  [[ 0.13774432  0.23615259 -0.5065298  -0.05720794  0.35264814 -0.40615064
  -0.45407805  0.4032358 ]
 [-0.35035503  0.11121289  0.11061306  0.03729173  0.3580314   0.04505859
   0.29091275 -0.12775044]
 [ 0.09592932  0.08095236  0.05281196  0.6957664  -0.7145529  