In [1]:
import os
os.environ.pop("TF_USE_LEGACY_KERAS", None)  # ensure we are not forcing tf_keras

import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler
import tensorflow as tf
from tensorflow import keras
from keras_tuner.tuners import BayesianOptimization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# file paths
input_path = '../../data/simulated/simulated_2d/'
model_path = './model_2d_tuned/'
os.makedirs(model_path, exist_ok=True)

# feature parameters
ndim = 2
feature_grid_range = [-4, 4]
feature_grid_nbins = 100


def extract_features(
    data,
    feature_grid_range=feature_grid_range,
    feature_grid_nbins=feature_grid_nbins
):
    """
    Extracts normalized histogram features from standardized 1D or 2D data.
    
    Arguments:
        data: 1D or 2D numpy array
              - if 1D: shape (n_samples,)
              - if 2D: shape (n_samples, 2)
    Returns:
        features: 1D or 2D feature vector (normalized histogram)
    """
    
    data = np.asarray(data).squeeze()
    
    # 1d case
    if data.ndim == 1:
        
        # check for standardized data
        assert np.abs(0 - data.mean()) < 1e-2, 'Error: data not standardized'
        assert np.abs(1 - data.std()) < 1e-2, 'Error: data not standardized'
        
        # histogram
        features = np.histogram(
            data,
            bins=np.linspace(
                feature_grid_range[0],
                feature_grid_range[1],
                feature_grid_nbins+1
            ),
            density=True
        )[0]
    
    # 2d case
    elif data.ndim == 2 and data.shape[1] == 2:
        # check for standardized data
        assert np.allclose(data.mean(axis=0), 0, atol=1e-2), 'Error: data not standardized'
        assert np.allclose(data.std(axis=0), 1, atol=1e-2), 'Error: data not standardized'
        
        # 2d histogram
        features = np.histogram2d(
            data[:, 0], data[:, 1],
            bins=np.linspace(
                feature_grid_range[0],
                feature_grid_range[1],
                feature_grid_nbins+1
            ),
            density=True
        )[0]
    
    else:
        raise ValueError("Input must be standardized 1D array or 2D array with 2 features")
    
    # normalize features to [0,1]
    features = (features - features.min()) / (features.max() - features.min())
    
    return features
    

# load data
x_train = pickle.load(open(input_path+'/train/input_data.pkl', 'rb'))
y_train = pickle.load(open(input_path+'/train/target_data.pkl', 'rb'))
sizes_train = pickle.load(open(input_path+'/train/sizes.pkl', 'rb'))
x_val = pickle.load(open(input_path+'/val/input_data.pkl', 'rb'))
y_val = pickle.load(open(input_path+'/val/target_data.pkl', 'rb'))
sizes_val = pickle.load(open(input_path+'/val/sizes.pkl', 'rb'))

# prepare features
x_train = [extract_features(i) for i in x_train]
x_train = np.array(x_train).squeeze()
x_val = [extract_features(i) for i in x_val]
x_val = np.array(x_val).squeeze()

# standardize outputs
scaler = RobustScaler()
y_train = scaler.fit_transform(y_train)
y_val = scaler.transform(y_val)
with open(f"{model_path}/scaler.pkl", 'wb') as f:
    pickle.dump(scaler, f)
    
# add channel axis
x_train = x_train[..., np.newaxis]
x_val = x_val[..., np.newaxis]


2025-12-14 20:00:53.197129: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
x_train.shape


(13000, 100, 100, 1)

In [3]:
x_val.shape


(1000, 100, 100, 1)

In [None]:


def build_model(hp):
    """Define CNN regressor architecture with tunable hyperparameters."""

    # Input shape selection
    if ndim == 1:
        input_dim = (feature_grid_nbins, 1)
        output_dim = 3
        conv_layer = tf.keras.layers.Conv1D
        pooling_layer = tf.keras.layers.MaxPooling1D
    else:
        input_dim = (feature_grid_nbins, feature_grid_nbins, 1)
        output_dim = 6
        conv_layer = tf.keras.layers.Conv2D
        pooling_layer = tf.keras.layers.MaxPooling2D

    # Tunable hyperparameters
    num_conv_blocks = hp.Choice('num_conv_blocks', [2, 3])  # number of conv+pool blocks
    kernel_size = hp.Choice('kernel_size', [5, 9, 13])      # large kernels
    pool_size = hp.Choice('pool_size', [2, 3])              # moderate pooling
    dense_units = hp.Choice('dense_units', [32, 64])        # size of dense layer
    dropout_rate = 0.2                                      # regularization
    learning_rate = hp.Choice('learning_rate', [1e-2, 5e-3, 1e-3, 5e-4])
    optimizer_choice = hp.Choice('optimizer', ['adam', 'sgd_momentum'])
    loss_choice = hp.Choice('loss', ['mse', 'mae', 'huber'])

    # Model body
    inputs = tf.keras.layers.Input(shape=input_dim)
    x = inputs
    for i in range(num_conv_blocks):
        filters = 32 if i == 0 else 64
        x = conv_layer(filters, kernel_size=kernel_size,
                       activation='relu', padding='same')(x)
        x = pooling_layer(pool_size=pool_size, padding='same')(x)

    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(dense_units, activation='relu')(x)
    if dropout_rate > 0.0:
        x = tf.keras.layers.Dropout(dropout_rate)(x)
    outputs = tf.keras.layers.Dense(output_dim, activation='linear')(x)

    model = tf.keras.Model(inputs, outputs)

    # Optimizer choice (SGD with momentum)
    if optimizer_choice == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)

    model.compile(
        optimizer=optimizer,
        loss=loss_choice,
        metrics=['mae', 'mse']  # keep val_mae as objective
    )

    return model



tuner = BayesianOptimization(
    build_model,
    objective='val_mae',          # minimize validation val MAE
    max_trials=50,                # total configurations to try
    num_initial_points=10,        # random warmup before Bayesian starts
    directory=model_path,
    project_name='rinet_v2_2d_bayesian_search'
)

checkpoint_cb = ModelCheckpoint(
    filepath=model_path + 'model.weights.h5',
    monitor='val_mae',
    mode='min',
    save_best_only=True,
    save_weights_only=True,
    verbose=0
)

earlystop_cb = EarlyStopping(
    monitor='val_mae',
    patience=5,
    restore_best_weights=True
)

tuner.search(
    x_train, y_train,
    epochs=50,
    validation_data=(x_val, y_val),
    batch_size=32,
    callbacks=[checkpoint_cb, earlystop_cb],
    verbose=2
)


Trial 15 Complete [00h 01m 04s]
val_mae: 0.32968807220458984

Best val_mae So Far: 0.30388593673706055
Total elapsed time: 00h 20m 48s

Search: Running Trial #16

Value             |Best Value So Far |Hyperparameter
2                 |3                 |num_conv_blocks
5                 |9                 |kernel_size
2                 |2                 |pool_size
32                |32                |dense_units
0.005             |0.01              |learning_rate
adam              |sgd_momentum      |optimizer
mse               |mae               |loss





Epoch 1/50


In [6]:
model = tuner.get_best_models(num_models=1)[0]
val_mae, _, val_mse = model.evaluate(x_val, y_val, verbose=0)
print("Best model val MAE:", val_mae)
model.save_weights(model_path + 'model.weights.h5')


  saveable.load_own_variables(weights_store.get(inner_path))


Best model val MAE: 0.29549986124038696


In [7]:
model.save(model_path + 'best_model.keras')


In [8]:
rows = []
for trial_id, trial in tuner.oracle.trials.items():
    row = trial.hyperparameters.values.copy()
    row['val_mae'] = trial.score
    rows.append(row)

df = pd.DataFrame(rows).sort_values(by='val_mae', ascending=True)
df.to_csv('./model_2d_tuning_summary.csv')
df


Unnamed: 0,num_conv_blocks,kernel_size,pool_size,dense_units,learning_rate,optimizer,loss,val_mae
23,3,13,2,64,0.01,sgd_momentum,mae,0.2955
26,3,13,2,64,0.01,sgd_momentum,huber,0.298483
25,3,9,2,64,0.01,sgd_momentum,mae,0.301143
18,3,9,2,64,0.01,sgd_momentum,huber,0.301453
0,3,9,2,32,0.01,sgd_momentum,mae,0.303886
28,3,9,2,32,0.01,sgd_momentum,mae,0.306661
29,3,9,2,32,0.01,sgd_momentum,mae,0.30712
4,3,13,3,64,0.01,sgd_momentum,mae,0.307196
17,3,13,2,32,0.01,sgd_momentum,huber,0.308504
2,3,9,2,64,0.01,sgd_momentum,mse,0.308909
