In [1]:
import csv
import numpy as np

# Get the real data from https://www.kaggle.com/mlg-ulb/creditcardfraud/
fname = '../../dados/creditcard.csv'

all_features = []
all_targets = []
with open(fname) as f:
    for i, line in enumerate(f):
        if i == 0:
            print("HEADER:", line.strip())
            continue  # Skip header
        fields = line.strip().split(",")
        all_features.append([float(v.replace('"', "")) for v in fields[:-1]])
        all_targets.append([int(fields[-1].replace('"', ""))])
        if i == 1:
            print("EXAMPLE FEATURES:", all_features[-1])

features = np.array(all_features, dtype="float32")
targets = np.array(all_targets, dtype="uint8")
print("features.shape:", features.shape)
print("targets.shape:", targets.shape)

HEADER: "Time","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount","Class"
EXAMPLE FEATURES: [0.0, -1.3598071336738, -0.0727811733098497, 2.53634673796914, 1.37815522427443, -0.338320769942518, 0.462387777762292, 0.239598554061257, 0.0986979012610507, 0.363786969611213, 0.0907941719789316, -0.551599533260813, -0.617800855762348, -0.991389847235408, -0.311169353699879, 1.46817697209427, -0.470400525259478, 0.207971241929242, 0.0257905801985591, 0.403992960255733, 0.251412098239705, -0.018306777944153, 0.277837575558899, -0.110473910188767, 0.0669280749146731, 0.128539358273528, -0.189114843888824, 0.133558376740387, -0.0210530534538215, 149.62]
features.shape: (284807, 30)
targets.shape: (284807, 1)


In [2]:
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]

print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))

Number of training samples: 227846
Number of validation samples: 56961


In [3]:
counts = np.bincount(train_targets[:, 0])
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(train_targets)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]

Number of positive samples in training data: 417 (0.18% of total)


In [4]:
mean = np.mean(train_features, axis=0)
train_features -= mean
val_features -= mean
std = np.std(train_features, axis=0)
train_features /= std
val_features /= std

In [5]:
import keras

model = keras.Sequential(
    [
        keras.Input(shape=train_features.shape[1:]),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
model.summary()

2024-04-14 08:25:54.360306: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-14 08:25:54.362901: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-14 08:25:54.406184: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-14 08:25:55.636172: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-14 08:25:55.636659: W tensorflow/core/common_runtime/gpu/

In [8]:
callback = keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [15]:
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics
)

path = '/home/rafael/MEGA/github/Machine_Learning/06-DeepLearning/01-Fraude-Cartao/fraud-best-model/best.weights.h5'

# check = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.keras")]
check = keras.callbacks.ModelCheckpoint(
    filepath=path,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)
early = keras.callbacks.EarlyStopping(monitor='loss', patience=50)

class_weight = {0: weight_for_0, 1: weight_for_1}

model.fit(
    train_features,
    train_targets,
    batch_size=128,
    epochs=3000,
    verbose=2,
    callbacks=[check,early],
    validation_data=(val_features, val_targets),
    class_weight=class_weight,
)

Epoch 1/3000
1781/1781 - 5s - 3ms/step - fn: 235.0000 - fp: 96717.0000 - loss: 6.1057e-06 - precision: 0.0019 - recall: 0.4365 - tn: 130712.0000 - tp: 182.0000 - val_fn: 0.0000e+00 - val_fp: 56886.0000 - val_loss: 0.7765 - val_precision: 0.0013 - val_recall: 1.0000 - val_tn: 0.0000e+00 - val_tp: 75.0000
Epoch 2/3000
1781/1781 - 3s - 2ms/step - fn: 138.0000 - fp: 144484.0000 - loss: 6.0892e-06 - precision: 0.0019 - recall: 0.6691 - tn: 82945.0000 - tp: 279.0000 - val_fn: 75.0000 - val_fp: 0.0000e+00 - val_loss: 0.6122 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 56886.0000 - val_tp: 0.0000e+00
Epoch 3/3000
1781/1781 - 3s - 2ms/step - fn: 267.0000 - fp: 84976.0000 - loss: 6.1029e-06 - precision: 0.0018 - recall: 0.3597 - tn: 142453.0000 - tp: 150.0000 - val_fn: 0.0000e+00 - val_fp: 56886.0000 - val_loss: 0.7361 - val_precision: 0.0013 - val_recall: 1.0000 - val_tn: 0.0000e+00 - val_tp: 75.0000
Epoch 4/3000
1781/1781 - 3s - 2ms/step - fn: 212.0000 - fp: 107446.0000 - los

<keras.src.callbacks.history.History at 0x73e1c47259d0>