In [1]:
import os
import sys 
os.chdir('..')
sys.path.insert(0, os.getcwd())
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K 
import glob

from Data_loader import load_subjects_from_json, get_all_npy_paths_by_group, base_folders
from AE_pipeline import (
    convert_npy_to_tfrecord,
    create_tfrecord_dataset,
    write_sharded_tfrecord,
    make_monolithic_ds,
    build_lstm_autoencoder,
    train_autoencoder,
    evaluate_and_detect,
    extract_and_save_latents,
    n_timesteps,
    NUM_BIOMECHANICAL_VARIABLES,
    _parse_cycle,
    BATCH_SIZE
)

2025-06-16 10:31:11.285648: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-16 10:31:11.304294: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-16 10:31:11.308855: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-16 10:31:11.321453: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Mixed precision enabled


In [2]:
#Load subjects lists 
train_subjects = {
    "G01": load_subjects_from_json("G01_train_subjects.json"),
    "G03": load_subjects_from_json("G03_train_subjects.json")
}
val_subjects = {
    "G01": load_subjects_from_json("G01_validation_subjects.json"),
    "G03": load_subjects_from_json("G03_validation_subjects.json")
}
test_subjects = {
    "G01": load_subjects_from_json("G01_test_subjects.json"),
    "G03": load_subjects_from_json("G03_test_subjects.json")
}


In [3]:
# Generate routes .npy
train_npy = get_all_npy_paths_by_group(train_subjects, base_folders)
val_npy   = get_all_npy_paths_by_group(val_subjects,   base_folders)
test_npy  = get_all_npy_paths_by_group(test_subjects,  base_folders)

print(f"Train .npy: {len(train_npy)} files")
print(f" Val  .npy: {len(val_npy)} files")
print(f" Test .npy: {len(test_npy)} files")


Train .npy: 932 files
 Val  .npy: 196 files
 Test .npy: 169 files


In [4]:
#Generar SHARDS para TRAIN 
shards_dir = "train_shards"
if not os.path.isdir(shards_dir):
    write_sharded_tfrecord(
        npy_paths=train_npy,
        output_dir=shards_dir,
        shard_size=5_000
    )
    print(f"→ Shards generados en: {shards_dir}/")
else:
    print(f"→ Shards ya existen en: {shards_dir}/")

→ Shards ya existen en: train_shards/


In [5]:
#Convertir VAL y TEST a TFRecord monolítico 
for split, npy_list in [("val", val_npy), ("test", test_npy)]:
    tfp = f"{split}_cycles.tfrecord.gz"
    if not os.path.exists(tfp):
        convert_npy_to_tfrecord(npy_list, tfp)
        print(f"Converted → {tfp}")
    else:
        print(f"Skipping (already exists) → {tfp}")

Skipping (already exists) → val_cycles.tfrecord.gz
Skipping (already exists) → test_cycles.tfrecord.gz


In [6]:
#Create tf.data.Dataset
# 3a) Lista de archivos shard
shard_files = sorted(glob.glob(os.path.join(shards_dir, "*.tfrecord.gz")))

# 3b) Pipeline shard-aware
train_ds = (
   tf.data.Dataset
      .list_files(shard_files, shuffle=True)
      .interleave(
         lambda f: tf.data.TFRecordDataset(f, compression_type="GZIP"),
         cycle_length=4,
         num_parallel_calls=tf.data.AUTOTUNE
      )
      .map(_parse_cycle, num_parallel_calls=tf.data.AUTOTUNE)
      .shuffle(5_000, seed=42)
      .batch(BATCH_SIZE, drop_remainder=True)
      .prefetch(tf.data.AUTOTUNE)
)

# VAL y TEST: dataset monolítico


val_ds  = make_monolithic_ds("val_cycles.tfrecord.gz")
test_ds = make_monolithic_ds("test_cycles.tfrecord.gz")

print(f"→ train_ds: {train_ds}")
print(f"→ val_ds:   {val_ds}")
print(f"→ test_ds:  {test_ds}")


I0000 00:00:1750080679.836742  770394 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1750080679.876470  770394 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1750080679.876659  770394 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1750080679.877612  770394 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

→ train_ds: <_PrefetchDataset element_spec=(TensorSpec(shape=(32, 100, 321), dtype=tf.float32, name=None), TensorSpec(shape=(32, 100, 321), dtype=tf.float32, name=None))>
→ val_ds:   <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None), TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None))>
→ test_ds:  <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None), TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None))>


In [None]:
# Optional from a batch validate if still NaN 
for x_batch, y_batch in train_ds.take(1):
      import tensorflow as tf
      print("Input  ◂ min:", tf.reduce_min(x_batch).numpy(),
            "max:", tf.reduce_max(x_batch).numpy(),
            "mean:", tf.reduce_mean(x_batch).numpy(),
            "std:", tf.math.reduce_std(x_batch).numpy())
      print("Target ◂", 
            tf.reduce_min(y_batch).numpy(), tf.reduce_max(y_batch).numpy())
      # Comprueba si hay NaN/Inf
      print("Any NaN in x?", tf.reduce_any(tf.math.is_nan(x_batch)).numpy())
      print("Any Inf in x?", tf.reduce_any(tf.math.is_inf(x_batch)).numpy())
      break


Input  ◂ min: -12.165039 max: 15.267122 mean: -0.009751429 std: 1.0221982
Target ◂ -12.165039 15.267122
Any NaN in x? False
Any Inf in x? False


In [None]:
#Build and train the Autoencoder
# Hiperparameters 
def r2(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    ss_total = K.sum(K.square(y_true - K.mean(y_true)))
    ss_residual = K.sum(K.square(y_true - y_pred))
    return 1 - (ss_residual / ss_total)

optimizers = {
    #"adam": tf.keras.optimizers.Adam(learning_rate=1e-4),
    "adamw": tf.keras.optimizers.AdamW(learning_rate=1e-5),
    #"sgd": tf.keras.optimizers.SGD(learning_rate=1e-4)
}


#run_id = "32_tanh_lr1e4_50ep"
n_timesteps = 100
n_vars = 321
latent_dim = 16
enc_activation = 'tanh'
dec_activation = 'tanh'
dense_activation = 'linear'
recurrent_activation = 'sigmoid'
dropout = 0.2
recurrent_dropout = 0.2
lr = 1e-5
epochs = 30

all_histories = {}


for opt_name, optimizer in optimizers.items():
    run_id = f"16_{opt_name}_lr1e5_50ep"
    print(f"---- Ejecutando experimento: {run_id} ----")
    model = build_lstm_autoencoder(
        n_timesteps=n_timesteps,
        n_vars=n_vars,
        latent_dim=latent_dim,
        enc_activation=enc_activation,
        dec_activation=dec_activation,
        dense_activation=dense_activation,
        recurrent_activation=recurrent_activation,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        lr=1e-4  
    )

    model.compile(
        optimizer=optimizer, 
        loss='mse', 
        metrics=[tf.keras.metrics.RootMeanSquaredError(), r2]  # Usamos la función personalizada r2 aquí
    )

    history = train_autoencoder(model,
        train_ds,
        val_ds,
        run_id,
        epochs
    )
    all_histories[run_id] = history.history
        

---- Ejecutando experimento: 16_adamw_lr1e5_50ep ----
Epoch 1/30
   1726/Unknown [1m364s[0m 206ms/step - loss: 1.9919 - r2: 0.0038 - root_mean_squared_error: 1.0806

In [8]:
model_path = 'saved_models/best_ae_32_tanh_lr1e4_50ep.keras'  
model = load_model(model_path)

In [9]:
#Evaluation
all_losses, threshold = evaluate_and_detect(model, test_ds)


2025-06-12 11:21:33.181680: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


Test reconstruction MSE: 0.583976
Detected 4667 anomalies out of 35115 (threshold=0.871904)


2025-06-12 11:22:27.171544: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
#Latent characteristics 
latents = extract_and_save_latents(model, test_ds, output_path="latent_features_test.npy")
