In [1]:
pip install pyarrow

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd 
import pyarrow

In [3]:
df = pd.read_parquet("../data/full_dataset_feature_engineering_v2.parquet")

In [4]:
import pickle
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

# Load the scalers and feature list
with open('./pickles/hybrid_feature_scaler.pkl', 'rb') as file:
    feature_scaler = pickle.load(file)

with open('./pickles/hybrid_target_scaler.pkl', 'rb') as file:
    target_scaler = pickle.load(file)

with open('./pickles/hybrid_selected_features.pkl', 'rb') as file:
    selected_features = pickle.load(file)

2025-04-21 05:15:30.055085: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-21 05:15:30.264566: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-21 05:15:30.355602: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745212530.468935   22074 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745212530.508195   22074 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745212530.785423   22074 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

In [5]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

X_df = df[selected_features].values
y_df = df['return_forward'].values  # Replace with your target column name
original_indexes = df.index.tolist()

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_df)

# If your target needs scaling too (for regression problems)
y_scaler = MinMaxScaler()
y_scaled = y_scaler.fit_transform(y_df.reshape(-1, 1))

# Define sequence length (time steps to look back)
sequence_length = 12  # Adjust based on your specific problem

# Create sequences for transformer
X_sequences = []
y_sequences = []
sequence_indexes = []

for i in range(len(X_scaled) - sequence_length):
    X_sequences.append(X_scaled[i:i+sequence_length])
    y_sequences.append(y_scaled[i+sequence_length])
    sequence_indexes.append(original_indexes[i+sequence_length])

# Convert to numpy arrays
X_sequences = np.array(X_sequences)
y_sequences = np.array(y_sequences)

# Check the resulting shapes
print(f"X shape: {X_sequences.shape}")  # Should be (samples, sequence_length, num_features)
print(f"y shape: {y_sequences.shape}")  # Should be (samples, 1) or (samples,)

# Split into training and testing sets

X_train, X_test, y_train, y_test ,train_idx, test_idx = train_test_split(
    X_sequences, y_sequences, sequence_indexes, test_size=0.25062, shuffle=False
)

print(f"Total sequences: {len(sequence_indexes)}")
print(f"Training sequences: {len(train_idx)}")
print(f"Testing sequences: {len(test_idx)}")


X shape: (35046, 12, 15)
y shape: (35046, 1)
Total sequences: 35046
Training sequences: 26262
Testing sequences: 8784


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers # Required for Transformer components
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Use scikeras wrapper for Regressor
from scikeras.wrappers import KerasRegressor

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, loguniform # For parameter distributions if needed later

# --- Ensure Transformer Components are Defined or Imported ---
# (Assuming positional_encoding and TransformerEncoderBlock are defined as before)
# Helper function for Positional Encoding
def positional_encoding(length, depth):
    # (Implementation from previous example)
    depth = depth / 2
    positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
    depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)
    angle_rates = 1 / (10000**depths)                # (1, depth)
    angle_rads = positions * angle_rates             # (pos, depth)
    pos_encoding = np.concatenate(
        [np.sin(angle_rads), np.cos(angle_rads)],
        axis=-1)
    return tf.cast(pos_encoding, dtype=tf.float32)[tf.newaxis, :, :]

# Transformer Encoder Block Layer
class TransformerEncoderBlock(layers.Layer):
    # (Implementation from previous example, including get_config)
    def __init__(self, d_model, num_heads, ff_dim, dropout_rate=0.1, activation='relu', **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.activation = activation
        if d_model % num_heads != 0:
             raise ValueError(f"d_model ({d_model}) must be divisible by num_heads ({num_heads})")
        self.key_dim = d_model // num_heads
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=self.key_dim, dropout=dropout_rate)
        self.ffn = keras.Sequential([layers.Dense(ff_dim, activation=activation), layers.Dense(d_model)])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(dropout_rate)
        self.dropout2 = layers.Dropout(dropout_rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)
        return out2

    def get_config(self):
        config = super().get_config()
        config.update({'d_model': self.d_model, 'num_heads': self.num_heads, 'ff_dim': self.ff_dim,
                       'dropout_rate': self.dropout_rate, 'activation': self.activation})
        return config

# --- Ensure Transformer Model Creation Function is Defined or Imported ---
# (Assuming create_transformer_model is defined as before)
def create_transformer_model(
    d_model=128, num_heads=8, num_encoder_layers=4, learning_rate=0.001,
    n_timesteps=20, n_features=5, ff_dim_factor=4, dropout_rate=0.1,
    activation='relu', optimizer_name='adam', loss_function='mean_squared_error'
    ):
    # (Implementation from previous example)
    ff_dim = d_model * ff_dim_factor
    inputs = keras.Input(shape=(n_timesteps, n_features))
    x = layers.TimeDistributed(layers.Dense(d_model))(inputs)
    pos_enc = positional_encoding(length=n_timesteps, depth=d_model)
    x += pos_enc[:, :n_timesteps, :]
    x = layers.Dropout(dropout_rate)(x)
    for _ in range(num_encoder_layers):
        encoder_block = TransformerEncoderBlock(d_model=d_model, num_heads=num_heads, ff_dim=ff_dim,
                                                dropout_rate=dropout_rate, activation=activation)
        x = encoder_block(x)
    x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(1, activation="linear", dtype='float32')(x) # Ensure output is float32 for mixed precision
    model = keras.Model(inputs=inputs, outputs=outputs, name=f"Transformer_Reg")
    if optimizer_name.lower() == 'adam': optimizer = Adam(learning_rate=learning_rate)
    else: optimizer = Adam(learning_rate=learning_rate) # Defaulting to Adam
    model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae', 'mse'])
    return model


# --- Function to Perform Randomized Search for Key Transformer Hyperparameters ---

def tune_transformer_hyperparameters(
    X_train: np.ndarray,
    y_train: np.ndarray,
    n_iter: int = 5,
    cv: int = 2,
    # --- Define search ranges for the 3 key parameters ---
    d_model_options: list = [64, 128, 256],
    num_heads_options: list = [4, 8], # Ensure compatibility with d_model options
    num_layers_range: tuple = (2, 7), # Min layers (inclusive), Max layers (exclusive)
    # --- Specify Fixed Parameters for this tuning run ---
    fixed_learning_rate: float = 0.001,
    fixed_batch_size: int = 64, # Can be tuned separately or fixed
    fixed_ff_dim_factor: int = 4,
    fixed_dropout_rate: float = 0.1,
    fixed_activation: str = 'relu',
    fixed_epochs: int = 100,
    early_stopping_patience: int = 10,
    scoring_metric: str = 'neg_mean_squared_error', # For regression
    random_state: int = 42
    ) -> RandomizedSearchCV:
    """
    Performs RandomizedSearchCV for a Transformer regression model, focusing on
    d_model, num_encoder_layers, and num_heads.

    Args:
        X_train: Training features (samples, timesteps, features).
        y_train: Training target values.
        n_iter: Number of parameter settings sampled by RandomizedSearchCV.
        cv: Number of cross-validation folds.
        d_model_options: List of choices for model dimensionality.
        num_heads_options: List of choices for attention heads.
        num_layers_range: Tuple (min, max) for randint sampling of encoder layers.
        fixed_learning_rate: Learning rate to use (fixed for this search).
        fixed_batch_size: Batch size to use (fixed for this search).
        fixed_ff_dim_factor: Factor to determine feedforward dim (d_model * factor).
        fixed_dropout_rate: Dropout rate to use.
        fixed_activation: Activation function ('relu' or 'gelu').
        fixed_epochs: Max epochs for training each model (used with EarlyStopping).
        early_stopping_patience: Patience for EarlyStopping callback.
        scoring_metric: Scikit-learn scorer name for evaluation.
        random_state: Seed for reproducibility.

    Returns:
        Fitted RandomizedSearchCV object containing the results.
    """
    tf.random.set_seed(random_state)
    np.random.seed(random_state)

    # Derive input shapes from data
    if X_train.ndim != 3:
        raise ValueError("X_train must be 3-dimensional (samples, timesteps, features)")
    n_timesteps = X_train.shape[1]
    n_features = X_train.shape[2]

    print(f"--- Starting Transformer Tuning ---")
    print(f"Input shape: (Timesteps={n_timesteps}, Features={n_features})")
    print(f"Tuning Params: d_model, num_encoder_layers, num_heads")
    print(f"Fixed Params: LR={fixed_learning_rate}, Batch={fixed_batch_size}, "
          f"Dropout={fixed_dropout_rate}, FF_Factor={fixed_ff_dim_factor}, Act={fixed_activation}")
    print(f"Search: {n_iter} iterations, {cv} folds")
    print("-" * 30)

    # --- Define Hyperparameter Search Space ---
    param_distributions = {
        # Tuned Model Hyperparameters
        'model__d_model': d_model_options,
        'model__num_heads': num_heads_options,
        'model__num_encoder_layers': randint(num_layers_range[0], num_layers_range[1]),

        # Fixed Model Hyperparameters (passed directly to the estimator)
        # Note: These are set in KerasRegressor below, not sampled here.

        # Fixed Training Hyperparameters (passed directly to the estimator)
        'batch_size': [fixed_batch_size] # Needs to be a list for RandomizedSearchCV, even if fixed
    }

    # --- Set up KerasRegressor ---
    keras_regressor = KerasRegressor(
        model=create_transformer_model,
        # Pass fixed model parameters required by create_transformer_model
        model__n_timesteps=n_timesteps,
        model__n_features=n_features,
        model__learning_rate=fixed_learning_rate, # Fixed LR
        model__ff_dim_factor=fixed_ff_dim_factor,
        model__dropout_rate=fixed_dropout_rate,
        model__activation=fixed_activation,
        model__loss_function='mean_squared_error', # Assuming MSE loss
        # Pass other fixed wrapper/compile params
        loss='mean_squared_error',
        optimizer='adam', # Optimizer type fixed here
        metrics=['mae', 'mse'],
        # Pass fixed fit parameters
        epochs=fixed_epochs,
        # Other fixed settings
        random_state=random_state,
        verbose=0 # Suppress Keras fit logs during search
    )

    # --- Define Callbacks ---
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=early_stopping_patience,
        restore_best_weights=True,
        verbose=0
    )

    # --- Setup Randomized Search CV ---
    random_search_transformer = RandomizedSearchCV(
        estimator=keras_regressor,
        param_distributions=param_distributions,
        n_iter=n_iter,
        cv=cv,
        scoring=scoring_metric,
        verbose=2, # Show progress during search
        n_jobs=1, # Safer for GPU memory management
        random_state=random_state,
        error_score='raise' # See errors during fitting
    )

    # --- Run the Search ---
    try:
        print("Fitting RandomizedSearchCV...")
        search_result = random_search_transformer.fit(
            X_train, y_train,
            callbacks=[early_stopping],
            validation_split=0.2 # Use portion of training data in each fold for early stopping
        )
        print("\nRandomized Search Finished.")
        return search_result

    except ValueError as e:
        print(f"\nAn error occurred during Randomized Search, potentially related to incompatible d_model/num_heads.")
        print(f"Error message: {e}")
        print("Consider adjusting d_model_options and num_heads_options.")
        return None # Indicate failure


# --- Example Usage ---
if __name__ == '__main__':
    # Generate dummy data for demonstration
    N_SAMPLES_DEMO = 500
    N_TIMESTEPS_DEMO = 20
    N_FEATURES_DEMO = 5

    def generate_regression_data(n_samples, n_timesteps, n_features):
        X = np.random.rand(n_samples, n_timesteps, n_features)
        y = np.sum(X[:, :, 0], axis=1) * 2.5 + np.random.normal(0, 0.5, n_samples)
        return X, y

    # X_demo, y_demo = generate_regression_data(N_SAMPLES_DEMO, N_TIMESTEPS_DEMO, N_FEATURES_DEMO)
    # # In a real scenario, you'd use your actual train/test split
    # # X_train_real, _, y_train_real, _ = train_test_split(...)

    # print(f"Demo Data Shapes: X={X_demo.shape}, y={y_demo.shape}")

    # --- Call the tuning function ---
    # Reduce n_iter for quick demo
    search_results = tune_transformer_hyperparameters(
        X_train=X_train,
        y_train=y_train,
        n_iter=5, # Reduced for speed in demo
        cv=2,      # Reduced for speed in demo
        d_model_options=[64, 128],       # Example options
        num_heads_options=[4,8],           # Example options (ensure compatibility)
        num_layers_range=(2, 4),         # Example range (2 or 3 layers)
        fixed_learning_rate=0.005,       # Example fixed LR
        fixed_batch_size=32,
        random_state=42 # Assuming SEED is defined (e.g., SEED=42)
    )

    # --- Process Results ---
    if search_results:
        print(f"\nBest Score ({search_results.scorer_.__name__}): {search_results.best_score_:.4f}")
        print("Best Parameters Found:")
        best_params_display = {k.replace('model__', ''): v for k, v in search_results.best_params_.items()}
        print(best_params_display)

        # You can access the best model refitted on the full training data (used in tuning)
        # best_model = search_results.best_estimator_
        # performance = best_model.evaluate(X_test, y_test) # Evaluate on actual test set
        # print(f"Test performance of best model: {performance}")
    else:
        print("\nHyperparameter tuning failed.")


--- Starting Transformer Tuning ---
Input shape: (Timesteps=12, Features=15)
Tuning Params: d_model, num_encoder_layers, num_heads
Fixed Params: LR=0.005, Batch=32, Dropout=0.1, FF_Factor=4, Act=relu
Search: 5 iterations, 2 folds
------------------------------
Fitting RandomizedSearchCV...
Fitting 2 folds for each of 5 candidates, totalling 10 fits


2025-04-21 05:15:35.887757: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
2025-04-21 05:15:36.563482: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 05:15:47.889869: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_un

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=3, model__num_heads=4; total time=10.3min


2025-04-21 05:26:06.190761: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 05:36:03.601274: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=3, model__num_heads=4; total time=10.2min


2025-04-21 05:36:14.687584: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 05:43:36.847503: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=2, model__num_heads=4; total time= 7.5min


2025-04-21 05:43:47.942595: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 05:52:39.051803: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=2, model__num_heads=4; total time= 9.0min


2025-04-21 05:52:54.908977: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 06:05:31.940613: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=3, model__num_heads=4; total time=12.9min


2025-04-21 06:05:47.543204: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 06:20:14.361334: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=3, model__num_heads=4; total time=14.7min


2025-04-21 06:20:29.330645: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 06:29:42.736308: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=2, model__num_heads=4; total time= 9.4min


2025-04-21 06:29:53.335069: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 06:37:43.551045: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=2, model__num_heads=4; total time= 8.0min


2025-04-21 06:37:58.002083: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 06:50:11.526699: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=3, model__num_heads=4; total time=12.5min


2025-04-21 06:50:27.239660: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-04-21 07:02:58.327693: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[CV] END batch_size=32, model__d_model=64, model__num_encoder_layers=3, model__num_heads=4; total time=12.8min


2025-04-21 07:03:21.276903: E tensorflow/core/framework/node_def_util.cc:680] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}



Randomized Search Finished.


AttributeError: '_PredictScorer' object has no attribute '__name__'

In [7]:
print(search_results)

RandomizedSearchCV(cv=2, error_score='raise',
                   estimator=KerasRegressor(epochs=100, loss='mean_squared_error', metrics=['mae', 'mse'], model=<function create_transformer_model at 0x7ed487f0c680>, model__activation='relu', model__dropout_rate=0.1, model__ff_dim_factor=4, model__learning_rate=0.005, model__loss_function='mean_squared_error', model__n_features=15, model__n_timesteps=12, optimizer='adam', random_state=42, verbose=0),
                   n_iter=5, n_jobs=1,
                   param_distributions={'batch_size': [32],
                                        'model__d_model': [64, 128],
                                        'model__num_encoder_layers': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x7ed489077a40>,
                                        'model__num_heads': [4]},
                   random_state=42, scoring='neg_mean_squared_error',
                   verbose=2)


In [8]:
print(search_results.best_params_)

{'batch_size': 32, 'model__d_model': 64, 'model__num_encoder_layers': 3, 'model__num_heads': 4}
