In [1]:
# --- OLD WORLD (tf.keras) ---
import os, sys
from tensorflow import keras as tfk

from rtdl_num_embeddings_tf_old import (
    LinearEmbeddings,
    LinearReLUEmbeddings,
    PeriodicEmbeddings,
)

old_path = "../../models/model_size_investigation_optuna_09122025_wrongkeras"
old_name = "data_d1_bootstrap_b1_model_init1_train_size_0.1_neg.keras"
old = tfk.models.load_model(
    os.path.join(old_path, old_name),
    custom_objects={
        "LinearEmbeddings": LinearEmbeddings,
        "LinearReLUEmbeddings": LinearReLUEmbeddings,
        "PeriodicEmbeddings": PeriodicEmbeddings,
    },
)


2025-09-18 14:35:40.369627: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-09-18 14:35:40.369676: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-09-18 14:35:40.371232: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-18 14:35:40.386049: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-18 14:35:47.030345: I tensorflow/core

In [2]:
weights_path = "data_d1_bootstrap_b1_model_init1_train_size_0.1_neg.weights.h5"
old.save_weights(os.path.join(old_path, weights_path))   # writes HDF5 weights


In [3]:
# --- NEW WORLD (Keras 3 / keras-core) ---
import os
os.environ.pop("KERAS_BACKEND", None)  # or set to "jax"/"tensorflow"/"torch" explicitly
import keras_core as keras

# import the Keras-3 versions of the layers
from rtdl_num_embeddings_tf import (
    LinearEmbeddings as LinearEmbeddings,
    LinearReLUEmbeddings as LinearReLUEmbeddings,
    PeriodicEmbeddings as PeriodicEmbeddings,
)

def build_model(input_dim, n_layers, units, embedding_method, embed_dim=12, n_bins=48):
    print(f"Building model with embedding {embedding_method}, {n_layers} layers, {units} units per layer, embed_dim {embed_dim}, and n_bins {n_bins}")

    model = keras.Sequential([keras.Input(shape=(input_dim,), dtype="float32")])

    # Tabular embedding layer
    if embedding_method == "linear":
        model.add(LinearEmbeddings(input_dim, embed_dim))
        model.add(keras.layers.Flatten())
    elif embedding_method == "linear_relu":
        model.add(LinearReLUEmbeddings(input_dim, embed_dim))
        model.add(keras.layers.Flatten())
    elif embedding_method == "periodic":
        # Defaults: k=64, sigma=0.02, activation=True (you can change)
        model.add(PeriodicEmbeddings(input_dim, embed_dim))
        model.add(keras.layers.Flatten())
    else:
        # No embedding, use raw inputs
        pass

    # If you’re using SELU, pair with lecun_normal + AlphaDropout (recommended for SELU)
    for _ in range(n_layers):
        model.add(keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal"))

    model.add(keras.layers.Dense(32, activation="linear"))
    return model

# Set these to your actual hyperparameters
n_layers = 6
n_units = 4096
embedding_method = "linear_relu"  # "none", "linear_relu", "periodic", "piecewise_linear_relu"
embed_dim = 12
n_bins = 48

new = build_model(
        input_dim=8,
        n_layers=n_layers, 
        units=n_units,
        embedding_method=embedding_method,
        embed_dim=embed_dim,
        n_bins=n_bins
    )
new.summary()


Using TensorFlow backend


Building model with embedding linear_relu, 6 layers, 4096 units per layer, embed_dim 12, and n_bins 48


In [4]:
# Keras 3 will match by name/shape for H5 weights
status = new.load_weights(os.path.join(old_path, weights_path), skip_mismatch=False)
print(status)  # optional; shows matched variables


None


In [12]:
print("\n--- OLD (tf.keras) variable names ---")
for v in old.weights:
    print(v.name, v.shape)

print("\n--- NEW (keras3) variable names ---")
for v in new.weights:
    print(v.name, v.shape)



--- OLD (tf.keras) variable names ---
linear_re_lu_embeddings/linear_embeddings/weight:0 (8, 12)
linear_re_lu_embeddings/linear_embeddings/bias:0 (8, 12)
dense/kernel:0 (96, 4096)
dense/bias:0 (4096,)
dense_1/kernel:0 (4096, 4096)
dense_1/bias:0 (4096,)
dense_2/kernel:0 (4096, 4096)
dense_2/bias:0 (4096,)
dense_3/kernel:0 (4096, 4096)
dense_3/bias:0 (4096,)
dense_4/kernel:0 (4096, 4096)
dense_4/bias:0 (4096,)
dense_5/kernel:0 (4096, 4096)
dense_5/bias:0 (4096,)
dense_6/kernel:0 (4096, 32)
dense_6/bias:0 (32,)

--- NEW (keras3) variable names ---
weight (8, 12)
bias (8, 12)
kernel (96, 4096)
bias (4096,)
kernel (4096, 4096)
bias (4096,)
kernel (4096, 4096)
bias (4096,)
kernel (4096, 4096)
bias (4096,)
kernel (4096, 4096)
bias (4096,)
kernel (4096, 4096)
bias (4096,)
kernel (4096, 32)
bias (32,)


In [14]:
import numpy as np

def compare_weights(old_model, new_model, atol=1e-6, rtol=1e-6):
    # backend-agnostic to-numpy
    try:
        from keras_core import ops as kops
        to_np = lambda v: kops.convert_to_numpy(v)
    except Exception:
        to_np = lambda v: v.numpy()

    old = {w.name: to_np(w) for w in old_model.weights}
    new = {w.name: to_np(w) for w in new_model.weights}

    missing = sorted(set(old) - set(new))
    diffs = []
    for name in sorted(set(old) & set(new)):
        a, b = old[name], new[name]
        if not np.allclose(a, b, atol=atol, rtol=rtol):
            diffs.append((name, a.shape, float(np.max(np.abs(a-b)))))

    return missing, diffs

missing, diffs = compare_weights(old, new)
print("Missing in new:", missing)
print("Mismatched:", diffs[:5])  # show a few if any

x = np.random.randn(8, old.input_shape[-1]).astype("float32")
y_old = old.predict(x, verbose=0)
y_new = new.predict(x, verbose=0)
print("max |Δ| =", np.max(np.abs(y_old - y_new)))

Missing in new: ['dense/bias:0', 'dense/kernel:0', 'dense_1/bias:0', 'dense_1/kernel:0', 'dense_2/bias:0', 'dense_2/kernel:0', 'dense_3/bias:0', 'dense_3/kernel:0', 'dense_4/bias:0', 'dense_4/kernel:0', 'dense_5/bias:0', 'dense_5/kernel:0', 'dense_6/bias:0', 'dense_6/kernel:0', 'linear_re_lu_embeddings/linear_embeddings/bias:0', 'linear_re_lu_embeddings/linear_embeddings/weight:0']
Mismatched: []


2025-09-18 14:42:00.680983: I external/local_xla/xla/service/service.cc:168] XLA service 0x8e29a90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-09-18 14:42:00.681008: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 2070, Compute Capability 7.5
2025-09-18 14:42:00.689633: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-09-18 14:42:00.737505: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904


max |Δ| = 3.874302e-07


I0000 00:00:1758242521.174109  621717 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1758242521.183270  621717 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


In [15]:
new_path = "data_d1_bootstrap_b1_model_init1_train_size_0.1_neg.keras"
new_weights_path = "../../models/model_size_investigation_optuna_09122025"
new.save(os.path.join(new_weights_path, new_path))


In [None]:
import os
import keras_core as keras
from rtdl_num_embeddings_tf import (
    LinearEmbeddings as LinearEmbeddings,
    LinearReLUEmbeddings as LinearReLUEmbeddings,
    PeriodicEmbeddings as PeriodicEmbeddings,
)
new_weights_path = "../../models/model_size_investigation_optuna_09122025"
new_path = "data_d1_bootstrap_b1_model_init1_train_size_0.1_neg.keras"

new2 = keras.models.load_model(os.path.join(new_weights_path, new_path))
new2.summary()



# Now let's do it for every file in the directory

In [1]:
import os
import sys
import numpy as np
from tensorflow import keras as tfk
from rtdl_num_embeddings_tf_old import LinearEmbeddings as LinearEmbeddings_old
from rtdl_num_embeddings_tf_old import LinearReLUEmbeddings as LinearReLUEmbeddings_old
from rtdl_num_embeddings_tf_old import PeriodicEmbeddings as PeriodicEmbeddings_old

os.environ.pop("KERAS_BACKEND", None)  # or set to "jax"/"tensorflow"/"torch" explicitly
import keras_core as keras

# import the Keras-3 versions of the layers
from rtdl_num_embeddings_tf import (
    LinearEmbeddings as LinearEmbeddings,
    LinearReLUEmbeddings as LinearReLUEmbeddings,
    PeriodicEmbeddings as PeriodicEmbeddings,
)

def build_model(input_dim, n_layers, units, embedding_method, embed_dim=12, n_bins=48):
    print(f"Building model with embedding {embedding_method}, {n_layers} layers, {units} units per layer, embed_dim {embed_dim}, and n_bins {n_bins}")

    model = keras.Sequential([keras.Input(shape=(input_dim,), dtype="float32")])

    # Tabular embedding layer
    if embedding_method == "linear":
        model.add(LinearEmbeddings(input_dim, embed_dim))
        model.add(keras.layers.Flatten())
    elif embedding_method == "linear_relu":
        model.add(LinearReLUEmbeddings(input_dim, embed_dim))
        model.add(keras.layers.Flatten())
    elif embedding_method == "periodic":
        # Defaults: k=64, sigma=0.02, activation=True (you can change)
        model.add(PeriodicEmbeddings(input_dim, embed_dim))
        model.add(keras.layers.Flatten())
    else:
        # No embedding, use raw inputs
        pass

    # If you’re using SELU, pair with lecun_normal + AlphaDropout (recommended for SELU)
    for _ in range(n_layers):
        model.add(keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal"))

    model.add(keras.layers.Dense(32, activation="linear"))
    return model

2025-09-18 15:20:33.716655: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-09-18 15:20:33.716688: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-09-18 15:20:33.718205: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-18 15:20:33.726799: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using TensorFlow backend


In [None]:
datas = ["d1", "d2", "d3"]
inits = ["init1", "init2", "init3"]
sizes = [0.1, 0.3, 0.5, 0.7, 0.9]
old_path = "../../models/model_size_investigation_optuna_09122025_wrongkeras"
new_weights_path = "../../models/model_size_investigation_optuna_09122025"

for data in datas:
    for init in inits:
        for size in sizes:
            filename = f"data_{data}_bootstrap_b1_model_{init}_train_size_{size}_neg"
            print (f"\nProcessing {filename}...")

            old_name = filename + ".keras"
            old = tfk.models.load_model(
                os.path.join(old_path, old_name),
                custom_objects={
                    "LinearEmbeddings": LinearEmbeddings_old,
                    "LinearReLUEmbeddings": LinearReLUEmbeddings_old,
                    "PeriodicEmbeddings": PeriodicEmbeddings_old,
                },
            )

            weights_path = filename + ".weights.h5"
            old.save_weights(os.path.join(old_path, weights_path))   # writes HDF5 weights
            
            # Set these to your actual hyperparameters
            n_layers = 6
            n_units = 4096
            embedding_method = "linear_relu"  # "none", "linear_relu", "periodic", "piecewise_linear_relu"
            embed_dim = 12
            n_bins = 48

            new = build_model(
                input_dim=8,
                n_layers=n_layers, 
                units=n_units,
                embedding_method=embedding_method,
                embed_dim=embed_dim,
                n_bins=n_bins
            )

            status = new.load_weights(os.path.join(old_path, weights_path), skip_mismatch=False)
            print(status)  # optional; shows matched variables

            x = np.random.randn(8, old.input_shape[-1]).astype("float32")
            y_old = old.predict(x, verbose=0)
            y_new = new.predict(x, verbose=0)
            print("max |Δ| =", np.max(np.abs(y_old - y_new)))

            new_path = filename + ".keras"
            new.save(os.path.join(new_weights_path, new_path))

            del old, new, y_old, y_new, x


Processing data_d1_bootstrap_b1_model_init2_train_size_0.1_neg...


2025-09-18 15:20:38.357968: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6638 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070, pci bus id: 0000:1b:00.0, compute capability: 7.5


Building model with embedding linear_relu, 6 layers, 4096 units per layer, embed_dim 12, and n_bins 48
None


2025-09-18 15:21:20.136104: I external/local_xla/xla/service/service.cc:168] XLA service 0x829f160 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-09-18 15:21:20.136133: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 2070, Compute Capability 7.5
2025-09-18 15:21:20.144805: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-09-18 15:21:20.164769: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
I0000 00:00:1758244880.537802  629841 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1758244880.545791  629841 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


max |Δ| = 5.9604645e-07

Processing data_d1_bootstrap_b1_model_init2_train_size_0.3_neg...
Building model with embedding linear_relu, 6 layers, 4096 units per layer, embed_dim 12, and n_bins 48
None


W0000 00:00:1758244922.457933  629839 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


max |Δ| = 4.172325e-07

Processing data_d1_bootstrap_b1_model_init2_train_size_0.5_neg...
Building model with embedding linear_relu, 6 layers, 4096 units per layer, embed_dim 12, and n_bins 48
None


W0000 00:00:1758244963.778621  629838 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


max |Δ| = 4.7683716e-07

Processing data_d1_bootstrap_b1_model_init2_train_size_0.7_neg...
Building model with embedding linear_relu, 6 layers, 4096 units per layer, embed_dim 12, and n_bins 48
None


W0000 00:00:1758245004.226229  629839 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


max |Δ| = 4.172325e-07
