<a href="https://colab.research.google.com/github/apriandito/pertamina-2/blob/main/05_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install tensorflow --quiet

In [4]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix, roc_curve
)

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

import plotly.graph_objects as go

In [5]:
# 1. Load training data
train_url = (
    "https://raw.githubusercontent.com/apriandito/pertamina-2/"
    "refs/heads/main/data/bbm_fraud_train.csv"
)
df_train = pd.read_csv(train_url)
df_train.head()

Unnamed: 0,volume_liters,total_amount,hour,is_weekend,loyalty_member,customer_transaction_count,days_since_last_transaction,same_day_transactions,volume_deviation,amount_deviation,...,is_night_transaction,bbm_type_encoded,payment_method_encoded,day_of_week_encoded,customer_type_encoded,spbu_category_encoded,spbu_province_encoded,spbu_city_encoded,is_fraud,fraud_type
0,578.37,8039343.0,7,True,False,1,0.0,1,0.0,0.0,...,False,3,1,2,1,0,5,13,0,
1,664.13,4516084.0,8,False,False,2,109.0,1,0.069022,0.280616,...,False,4,2,6,1,1,3,26,0,
2,163.24,2024176.0,12,False,False,3,37.0,1,0.651628,0.583492,...,False,2,2,0,1,1,2,7,0,
3,54.64,759455.7,4,False,False,4,54.0,1,0.85034,0.801955,...,True,3,4,4,1,1,8,24,1,multiple_cards
4,357.87,4437588.0,2,True,False,5,8.0,1,0.015894,0.121926,...,True,2,1,2,1,2,8,24,0,


In [6]:
# 2. Siapkan fitur & target
X = df_train.drop(columns=["is_fraud", "fraud_type"])
y = df_train["is_fraud"]

In [7]:
# 3. Split data (chronological, tanpa shuffle)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

In [8]:
# 4. Scaling fitur
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [9]:
# 5. Build & fit Neural Network
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy')

es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=256,
    callbacks=[es],
    verbose=2
)

Epoch 1/50
157/157 - 8s - 53ms/step - loss: 0.1879 - val_loss: 0.1227
Epoch 2/50
157/157 - 1s - 6ms/step - loss: 0.1280 - val_loss: 0.1036
Epoch 3/50
157/157 - 1s - 7ms/step - loss: 0.1124 - val_loss: 0.0908
Epoch 4/50
157/157 - 1s - 6ms/step - loss: 0.1006 - val_loss: 0.0818
Epoch 5/50
157/157 - 1s - 9ms/step - loss: 0.0936 - val_loss: 0.0759
Epoch 6/50
157/157 - 1s - 8ms/step - loss: 0.0879 - val_loss: 0.0707
Epoch 7/50
157/157 - 1s - 5ms/step - loss: 0.0827 - val_loss: 0.0654
Epoch 8/50
157/157 - 1s - 7ms/step - loss: 0.0773 - val_loss: 0.0613
Epoch 9/50
157/157 - 1s - 4ms/step - loss: 0.0717 - val_loss: 0.0582
Epoch 10/50
157/157 - 1s - 4ms/step - loss: 0.0671 - val_loss: 0.0534
Epoch 11/50
157/157 - 0s - 3ms/step - loss: 0.0663 - val_loss: 0.0530
Epoch 12/50
157/157 - 0s - 3ms/step - loss: 0.0613 - val_loss: 0.0488
Epoch 13/50
157/157 - 1s - 4ms/step - loss: 0.0591 - val_loss: 0.0465
Epoch 14/50
157/157 - 1s - 4ms/step - loss: 0.0561 - val_loss: 0.0443
Epoch 15/50
157/157 - 1s - 7

<keras.src.callbacks.history.History at 0x79e5899e9690>

In [10]:
# 6. Predict & evaluasi
y_pred_proba = model.predict(X_test).ravel()
y_pred       = (y_pred_proba >= 0.5).astype(int)

cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

accuracy    = accuracy_score(y_test, y_pred)
precision   = precision_score(y_test, y_pred)
recall      = recall_score(y_test, y_pred)
specificity = tn / (tn + fp)
f1          = f1_score(y_test, y_pred)
roc_auc     = roc_auc_score(y_test, y_pred_proba)

print("=== Evaluation on Test Set ===")
print(f"Accuracy   : {accuracy:.4f}")
print(f"Precision  : {precision:.4f}")
print(f"Recall     : {recall:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"F1-score   : {f1:.4f}")
print(f"ROC AUC    : {roc_auc:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
=== Evaluation on Test Set ===
Accuracy   : 0.9908
Precision  : 0.9677
Recall     : 0.8434
Specificity: 0.9985
F1-score   : 0.9013
ROC AUC    : 0.9913


In [11]:
# 7. Plot ROC Curve dengan Plotly
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
fig = go.Figure()

# ROC curve line
fig.add_trace(go.Scatter(
    x=fpr, y=tpr,
    mode='lines',
    name=f'Neural Net (AUC = {roc_auc:.3f})',
    line=dict(width=2)
))

# Diagonal random guess
fig.add_trace(go.Scatter(
    x=[0,1], y=[0,1],
    mode='lines',
    name='Random Guess',
    line=dict(dash='dash', width=1)
))

fig.update_layout(
    title='ROC Curve - Neural Network Fraud Detection',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    legend=dict(x=0.65, y=0.15),
    template='plotly_white',
    width=700,
    height=500
)
fig.show()

In [12]:
# 8. Real‑time prediction
rt_url = (
    "https://raw.githubusercontent.com/apriandito/pertamina-2/"
    "refs/heads/main/data/bbm_fraud_realtime.csv"
)
df_rt = pd.read_csv(rt_url)

X_rt = scaler.transform(df_rt[X.columns])
df_rt['predicted_is_fraud'] = (model.predict(X_rt).ravel() >= 0.5).astype(int)
df_rt['fraud_probability']  = model.predict(X_rt).ravel()

print("\n=== Real‑time Prediction Summary ===")
print(f"Total transaksi   : {len(df_rt)}")
print(f"Predicted fraud    : {df_rt['predicted_is_fraud'].sum()} "
      f"({df_rt['predicted_is_fraud'].mean()*100:.2f}%)")
print(f"Average fraud prob : {df_rt['fraud_probability'].mean():.4f}")

df_rt.head()


[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

=== Real‑time Prediction Summary ===
Total transaksi   : 5000
Predicted fraud    : 449 (8.98%)
Average fraud prob : 0.0926


Unnamed: 0,volume_liters,total_amount,hour,is_weekend,loyalty_member,customer_transaction_count,days_since_last_transaction,same_day_transactions,volume_deviation,amount_deviation,...,is_night_transaction,bbm_type_encoded,payment_method_encoded,day_of_week_encoded,customer_type_encoded,spbu_category_encoded,spbu_province_encoded,spbu_city_encoded,predicted_is_fraud,fraud_probability
0,24.61,305164.0,3,False,False,1,0.0,1,0.0,0.0,...,True,2,2,1,2,1,2,5,0,5e-06
1,240.81,1637508.0,23,True,True,1,0.0,1,0.0,0.0,...,True,4,1,2,0,0,9,3,0,0.017227
2,78.48,784794.3,12,True,True,2,0.0,1,0.508409,0.352026,...,False,1,0,3,0,0,6,20,1,0.985743
3,112.02,1389048.0,21,True,True,3,0.0,2,0.220839,0.093351,...,False,2,1,3,0,0,9,28,0,0.005923
4,108.44,1084400.0,0,True,True,4,6.0,1,0.196369,0.114007,...,True,1,4,3,0,0,9,27,0,0.011566
