In [1]:
import os
import sys 
os.chdir('..')
sys.path.insert(0, os.getcwd())
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K 
import glob

from Data_loader import load_subjects_from_json, get_all_npy_paths_by_group, base_folders
from AE_pipeline import (
    convert_npy_to_tfrecord,
    create_tfrecord_dataset,
    write_sharded_tfrecord,
    make_monolithic_ds,
    build_lstm_autoencoder,
    train_autoencoder,
    evaluate_and_detect,
    extract_and_save_latents,
    n_timesteps,
    NUM_BIOMECHANICAL_VARIABLES,
    _parse_cycle,
    BATCH_SIZE,
    reconstruct_and_evaluate
)

2025-06-17 14:10:55.934929: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-17 14:10:55.951469: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-17 14:10:55.956514: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-17 14:10:55.968530: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Mixed precision enabled


In [2]:
#Load subjects lists 
train_subjects = {
    "G01": load_subjects_from_json("G01_train_subjects.json"),
    "G03": load_subjects_from_json("G03_train_subjects.json")
}
val_subjects = {
    "G01": load_subjects_from_json("G01_validation_subjects.json"),
    "G03": load_subjects_from_json("G03_validation_subjects.json")
}
test_subjects = {
    "G01": load_subjects_from_json("G01_test_subjects.json"),
    "G03": load_subjects_from_json("G03_test_subjects.json")
}


In [3]:
# Generate routes .npy
train_npy = get_all_npy_paths_by_group(train_subjects, base_folders)
val_npy   = get_all_npy_paths_by_group(val_subjects,   base_folders)
test_npy  = get_all_npy_paths_by_group(test_subjects,  base_folders)

print(f"Train .npy: {len(train_npy)} files")
print(f" Val  .npy: {len(val_npy)} files")
print(f" Test .npy: {len(test_npy)} files")


Train .npy: 932 files
 Val  .npy: 196 files
 Test .npy: 169 files


In [4]:
#Generar SHARDS para TRAIN 
shards_dir = "train_shards"
if not os.path.isdir(shards_dir):
    write_sharded_tfrecord(
        npy_paths=train_npy,
        output_dir=shards_dir,
        shard_size=5_000
    )
    print(f"→ Shards generados en: {shards_dir}/")
else:
    print(f"→ Shards ya existen en: {shards_dir}/")

→ Shards ya existen en: train_shards/


In [5]:
#Convertir VAL y TEST a TFRecord monolítico 
for split, npy_list in [("val", val_npy), ("test", test_npy)]:
    tfp = f"{split}_cycles.tfrecord.gz"
    if not os.path.exists(tfp):
        convert_npy_to_tfrecord(npy_list, tfp)
        print(f"Converted → {tfp}")
    else:
        print(f"Skipping (already exists) → {tfp}")

Skipping (already exists) → val_cycles.tfrecord.gz
Skipping (already exists) → test_cycles.tfrecord.gz


In [6]:
#Create tf.data.Dataset
# 3a) Lista de archivos shard
shard_files = sorted(glob.glob(os.path.join(shards_dir, "*.tfrecord.gz")))

# 3b) Pipeline shard-aware
train_ds = (
   tf.data.Dataset
      .list_files(shard_files, shuffle=True)
      .interleave(
         lambda f: tf.data.TFRecordDataset(f, compression_type="GZIP"),
         cycle_length=4,
         num_parallel_calls=tf.data.AUTOTUNE
      )
      .map(_parse_cycle, num_parallel_calls=tf.data.AUTOTUNE)
      .shuffle(5_000, seed=42)
      .batch(BATCH_SIZE, drop_remainder=True)
      .prefetch(tf.data.AUTOTUNE)
)

# VAL y TEST: dataset monolítico


val_ds  = make_monolithic_ds("val_cycles.tfrecord.gz")
test_ds = make_monolithic_ds("test_cycles.tfrecord.gz")

print(f"→ train_ds: {train_ds}")
print(f"→ val_ds:   {val_ds}")
print(f"→ test_ds:  {test_ds}")


I0000 00:00:1750180271.254533   20397 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1750180271.293898   20397 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1750180271.294108   20397 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1750180271.295594   20397 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

→ train_ds: <_PrefetchDataset element_spec=(TensorSpec(shape=(256, 100, 321), dtype=tf.float32, name=None), TensorSpec(shape=(256, 100, 321), dtype=tf.float32, name=None))>
→ val_ds:   <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None), TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None))>
→ test_ds:  <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None), TensorSpec(shape=(None, 100, 321), dtype=tf.float32, name=None))>


In [None]:
# Optional from a batch validate if still NaN 
for x_batch, y_batch in train_ds.take(1):
      import tensorflow as tf
      print("Input  ◂ min:", tf.reduce_min(x_batch).numpy(),
            "max:", tf.reduce_max(x_batch).numpy(),
            "mean:", tf.reduce_mean(x_batch).numpy(),
            "std:", tf.math.reduce_std(x_batch).numpy())
      print("Target ◂", 
            tf.reduce_min(y_batch).numpy(), tf.reduce_max(y_batch).numpy())
      # Comprueba si hay NaN/Inf
      print("Any NaN in x?", tf.reduce_any(tf.math.is_nan(x_batch)).numpy())
      print("Any Inf in x?", tf.reduce_any(tf.math.is_inf(x_batch)).numpy())
      break


Input  ◂ min: -12.165039 max: 15.267122 mean: -0.009751429 std: 1.0221982
Target ◂ -12.165039 15.267122
Any NaN in x? False
Any Inf in x? False


In [None]:
#Optional Identify # cycles in train and validation 
import numpy as np, math
# 1) Total de ciclos en train/val
total_train_cycles = sum(np.load(p).shape[0] for p in train_npy)
total_val_cycles   = sum(np.load(p).shape[0] for p in val_npy)
print(total_train_cycles)
print(total_val_cycles)

760
165


In [None]:
#Optional steps_per_epoch & validation_steps
BATCH_SIZE = 256
steps_per_epoch    = total_train_cycles // BATCH_SIZE
validation_steps   = total_val_cycles   // BATCH_SIZE
print(steps_per_epoch)
print(validation_steps)

In [None]:
#Build and train the Autoencoder
# Hiperparameters 
def r2(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    ss_total = K.sum(K.square(y_true - K.mean(y_true)))
    ss_residual = K.sum(K.square(y_true - y_pred))
    return 1 - (ss_residual / ss_total)

run_id = "32_tanh_lr1e4_30ep_AdamW"
n_timesteps = 100
n_vars = 321
latent_dim = 32
epochs = 50
lr_initial   = 1e-4
lr_decay_rate = 0.98
lr_decay_steps = 5000
clipnorm     = 1.0
steps_per_epoch  = 760
validation_steps = 165

model = build_lstm_autoencoder(
    n_timesteps=n_timesteps,
    n_vars=n_vars,
    latent_dim=latent_dim,
    )

history = train_autoencoder(
    model=model,
    train_ds=train_ds,
    val_ds=val_ds,
    run_id=run_id,
    epochs=epochs,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
    )

        

Epoch 1/50
[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 284ms/step - loss: 1.2599 - r2: 0.0092 - root_mean_squared_error: 1.0231 - val_loss: 1.0869 - val_r2: 0.0626 - val_root_mean_squared_error: 1.0079 - learning_rate: 1.0000e-04
Epoch 2/50


2025-06-17 14:23:34.139692: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-06-17 14:23:34.139775: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 37ms/step - loss: 0.0000e+00 - r2: 0.0000e+00 - root_mean_squared_error: 0.0000e+00 - val_loss: 1.0869 - val_r2: 0.0626 - val_root_mean_squared_error: 1.0079 - learning_rate: 1.0000e-04
Epoch 3/50
[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 276ms/step - loss: 0.8848 - r2: 0.1501 - root_mean_squared_error: 0.9046 - val_loss: 0.8821 - val_r2: 0.2496 - val_root_mean_squared_error: 0.9074 - learning_rate: 1.0000e-04
Epoch 4/50


2025-06-17 14:27:33.617079: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 37ms/step - loss: 0.0000e+00 - r2: 0.0000e+00 - root_mean_squared_error: 0.0000e+00 - val_loss: 0.8821 - val_r2: 0.2496 - val_root_mean_squared_error: 0.9074 - learning_rate: 1.0000e-04
Epoch 5/50
[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 281ms/step - loss: 0.7796 - r2: 0.2895 - root_mean_squared_error: 0.8495 - val_loss: 0.8321 - val_r2: 0.2973 - val_root_mean_squared_error: 0.8807 - learning_rate: 1.0000e-04
Epoch 6/50
[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 37ms/step - loss: 0.0000e+00 - r2: 0.0000e+00 - root_mean_squared_error: 0.0000e+00 - val_loss: 0.8321 - val_r2: 0.2973 - val_root_mean_squared_error: 0.8807 - learning_rate: 1.0000e-04
Epoch 7/50
[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m216s[0m 283ms/step - loss: 0.7678 - r2: 0.3356 - root_mean_squared_error: 0.8420 - val_loss: 0.8078 - val_r2: 0.3203 - val_root_mean_squared_error: 0.8689 - l

2025-06-17 14:35:40.544900: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 37ms/step - loss: 0.0000e+00 - r2: 0.0000e+00 - root_mean_squared_error: 0.0000e+00 - val_loss: 0.8078 - val_r2: 0.3203 - val_root_mean_squared_error: 0.8689 - learning_rate: 1.0000e-04
Epoch 9/50
[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 284ms/step - loss: 0.7350 - r2: 0.3576 - root_mean_squared_error: 0.8244 - val_loss: 0.7964 - val_r2: 0.3289 - val_root_mean_squared_error: 0.8638 - learning_rate: 1.0000e-04
Epoch 10/50
[1m760/760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 37ms/step - loss: 0.0000e+00 - r2: 0.0000e+00 - root_mean_squared_error: 0.0000e+00 - val_loss: 0.7964 - val_r2: 0.3289 - val_root_mean_squared_error: 0.8638 - learning_rate: 1.0000e-04
Epoch 11/50
[1m 60/760[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:56[0m 253ms/step - loss: 0.6860 - r2: 0.3741 - root_mean_squared_error: 0.7971

In [None]:
model_path = 'saved_models/best_ae_16_adamw_50ep.keras'  
model = load_model(model_path)

In [9]:
#Evaluation
all_losses, threshold = evaluate_and_detect(model, test_ds)


2025-06-12 11:21:33.181680: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


Test reconstruction MSE: 0.583976
Detected 4667 anomalies out of 35115 (threshold=0.871904)


2025-06-12 11:22:27.171544: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
#Latent characteristics 
latents = extract_and_save_latents(model, test_ds, output_path="latent_features_test.npy")


In [None]:
#Reconstruct from latent and measure error 
import tensorflow as tf
import numpy as np
from reconstruct_module import reconstruct_and_evaluate  # ajusta al nombre del módulo

# 1) Usamos test_ds en lugar de cargar un archivo .npy
#  - Ya tienes test_ds previamente cargado, solo usas ese dataset

# Define los índices de las variables que te interesan
attrs = [0, 10, 50]

# Crear un Dataset iterador para obtener los datos de test en batches
# Usamos un batch size más pequeño para evaluar (si lo necesitas)
batch_size = 32

# Recorremos el dataset por lotes
all_metrics = []
all_recon = []

for batch_data, _ in test_ds.take(-1):  # Recorre todo el test_ds
    batch_data = batch_data.numpy()  # Convertir de Tensor a NumPy array si es necesario

    # Llama a la función de reconstrucción y evaluación
    metrics, recon_subset = reconstruct_and_evaluate(
        model_path="saved_models/ae_lstm_experiment42.keras",
        data=batch_data,  # Aquí pasamos cada batch del test_ds
        attr_idx=attrs,
        batch_size=batch_size
    )

    all_metrics.append(metrics)  # Guarda los resultados
    all_recon.append(recon_subset)  # Guarda las reconstrucciones

# Concatenar resultados de todas las épocas
all_metrics = {key: np.concatenate([m[key] for m in all_metrics], axis=0) for key in all_metrics[0].keys()}
all_recon = np.concatenate(all_recon, axis=0)

# 2) Guarda la reconstrucción y muestra los errores
np.save("reconstructed_attrs.npy", all_recon)

print("Reconstructions saved to reconstructed_attrs.npy\n")
print("Error por atributo:")
for i, idx in enumerate(attrs):
    print(f"  Atributo {idx}:  RMSE = {all_metrics['rmse'][i]:.4f},  MSE = {all_metrics['mse'][i]:.4f},  MAE = {all_metrics['mae'][i]:.4f}")








parser = argparse.ArgumentParser(description="Evaluate AE reconstruction error")
parser.add_argument("--model", required=True, help="Path to saved Keras model")
parser.add_argument("--data", required=True, help="Path to .npy array")
parser.add_argument("--attrs", nargs="+", type=int, required=True,
                        help="Indices of attributes to reconstruct")
parser.add_argument("--out", default="reconstructed_attrs.npy",
                        help="File to save reconstructed attributes")
args = parser.parse_args()

# Load data
data = np.load(args.data).astype(np.float32)
metrics, recon_subset = reconstruct_and_evaluate(args.model, data, args.attrs, batch_size=32)

np.save(args.out, recon_subset)

print("Reconstruction error per attribute:")
for i, idx in enumerate(args.attrs):
    print(f"  Attr {idx}: RMSE={metrics['rmse'][i]:.6f} MSE={metrics['mse'][i]:.6f}")
