In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import keras_tuner as kt
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
cred_data = pd.read_csv("datos/creditcard.csv")

In [3]:
cred_data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [4]:
cred_data.shape

(284807, 31)

In [5]:
X_train = cred_data[cred_data['Class'] == 0].drop(['Class'], axis=1).values
X_train, X_val = train_test_split(X_train, 
                                   test_size=0.25, 
                                   random_state=42)

X_test = cred_data[cred_data['Class'] == 1].drop(['Class'], axis=1).values

In [6]:
scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [7]:
features = X_train.shape[1]

In [8]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, X_train)).batch(256).shuffle(1024).prefetch(tf.data.AUTOTUNE)
val_ds = tf.data.Dataset.from_tensor_slices((X_val, X_val)).batch(256).prefetch(tf.data.AUTOTUNE)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, X_test)).batch(256).prefetch(tf.data.AUTOTUNE)

2026-01-27 01:59:36.532008: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2026-01-27 01:59:36.532028: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2026-01-27 01:59:36.532034: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.92 GB
2026-01-27 01:59:36.532060: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2026-01-27 01:59:36.532070: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [9]:
def create_autoencoder(features, units_1, units_2):
    model = Sequential()
    model.add(Input(shape=(features,)))
    model.add(Dense(units_1, activation='tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(units_2, activation='tanh'))
    model.add(Dense(2, activation='linear'))
    model.add(Dense(units_2, activation='tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(units_1, activation='tanh'))
    model.add(Dense(features, activation='linear'))

    model.compile(
        optimizer=Adam(learning_rate=5e-3), 
        loss='huber_loss',
        metrics=['mse']
    )

    return model

In [10]:
def build_model(hp, features):
    units_1 = hp.Int("units_1", 16, 32, step=8)
    units_2 = hp.Int("units_2", 4, 8, step=2)

    model = create_autoencoder(features, units_1, units_2)

    return model
    

In [11]:
tuner = kt.Hyperband(
    hypermodel=lambda hp: build_model(hp, features),
    objective="val_loss",
    max_epochs=25,
    factor=5,
    directory="hyperband_dir",
    project_name="hyperband_project",
    overwrite=True,
    seed=42,
)

In [12]:
tuner.search(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    callbacks=[EarlyStopping(monitor='val_loss', patience=5)],
    verbose=1,
)

Trial 9 Complete [00h 00m 05s]
val_loss: 0.31044334173202515

Best val_loss So Far: 0.3014334440231323
Total elapsed time: 00h 00m 45s


In [13]:
best_hps = tuner.get_best_hyperparameters(1)[0]
autoencoder = create_autoencoder(
    features,
    best_hps.get("units_1"),
    best_hps.get("units_2"),
)

In [14]:
autoencoder.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 24)                744       
                                                                 
 dropout_2 (Dropout)         (None, 24)                0         
                                                                 
 dense_7 (Dense)             (None, 8)                 200       
                                                                 
 dense_8 (Dense)             (None, 2)                 18        
                                                                 
 dense_9 (Dense)             (None, 8)                 24        
                                                                 
 dropout_3 (Dropout)         (None, 8)                 0         
                                                                 
 dense_10 (Dense)            (None, 24)               

In [15]:
autoencoder.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    callbacks=[EarlyStopping(monitor='val_loss', patience=5)],
    verbose=1,
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100


<keras.src.callbacks.History at 0x141b661d0>

In [16]:
X_val_pred = autoencoder.predict(val_ds)
val_mae = np.mean(np.abs(X_val - X_val_pred), axis=1)



In [17]:
threshold = np.quantile(val_mae, 0.975)
threshold

1.3729537765382358

In [18]:
X_test_pred = autoencoder.predict(test_ds)
test_mae = np.mean(np.abs(X_test - X_test_pred), axis=1)



In [19]:
y_pred = (test_mae > threshold).astype(int)
print(accuracy_score(np.array([1 for _ in range(len(y_pred))]), y_pred))

0.8028455284552846
