# Spaceship Titanic GRANDE

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES']      = '0'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

import pandas as pd
from sklearn.model_selection import train_test_split
from GRANDE import GRANDE
import optuna
from sklearn.metrics import accuracy_score
import numpy as np
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

SEED = 42

## Fitting the GRANDE model

The code is from the [GRANDE github repository](https://github.com/s-marton/GRANDE).

In [2]:
# 1. Load & preprocess
df = pd.read_csv('data/train_processed.csv')
test_df = pd.read_csv('data/test_processed.csv')

In [3]:
print(df.dtypes)

PassengerId                   object
Age                          float64
RoomService                  float64
FoodCourt                    float64
ShoppingMall                 float64
Spa                          float64
VRDeck                       float64
Transported                     bool
CabinNum                       int64
HomePlanet_Europa               bool
HomePlanet_Mars                 bool
CryoSleep_True                  bool
Destination_PSO J318.5-22       bool
Destination_TRAPPIST-1e         bool
VIP_True                        bool
Deck_B                          bool
Deck_C                          bool
Deck_D                          bool
Deck_E                          bool
Deck_F                          bool
Deck_G                          bool
Deck_T                          bool
Side_S                          bool
dtype: object


In [4]:
print(test_df.dtypes)

PassengerId                   object
Age                          float64
RoomService                  float64
FoodCourt                    float64
ShoppingMall                 float64
Spa                          float64
VRDeck                       float64
CabinNum                       int64
HomePlanet_Europa               bool
HomePlanet_Mars                 bool
CryoSleep_True                  bool
Destination_PSO J318.5-22       bool
Destination_TRAPPIST-1e         bool
VIP_True                        bool
Deck_B                          bool
Deck_C                          bool
Deck_D                          bool
Deck_E                          bool
Deck_F                          bool
Deck_G                          bool
Deck_T                          bool
Side_S                          bool
dtype: object


In [None]:
X = df.drop(columns=['PassengerId','Transported']).values
y = df['Transported'].astype(int).values
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y,
    test_size=0.2,
    random_state=SEED,
    stratify=y
)

categorical_feature_indices = []

args = {
    'epochs': 100,
    'early_stopping_epochs': 10,
    'batch_size': 64,
    'cat_idx': categorical_feature_indices,
    'objective': 'binary',
    'random_seed': SEED,
    'verbose': 1,
}

# 3) make one “sampler” GRANDE (params can be empty dict)
sampler = GRANDE(params={}, args=args)

def objective(trial):
    tp = sampler.define_trial_parameters(trial, args)
    model = GRANDE(params=tp, args=args)
    model.fit(
        X_train=X_train,
        y_train=y_train,
        X_val=X_valid,
        y_val=y_valid
    )
    preds = model.predict(X_valid)          # shape = (n_samples, 2)
    y_pred = (preds[:,1] > 0.5).astype(int)
    return accuracy_score(y_valid, y_pred)

study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=SEED)
)
study.optimize(objective, n_trials=20)

print("Best CV accuracy:", study.best_value)
print("Best params:", study.best_params)

# 5) finally train your “production” model
best_params = study.best_params
final_model = GRANDE(params=best_params, args=args)
final_model.fit(
    X_train=X_train,
    y_train=y_train,
    X_val=X_valid,
    y_val=y_valid
)

[I 2025-04-27 17:14:05,144] A new study created in memory with name: no-name-e2daa48c-0a49-4f46-a220-5ba045b05abb


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 60ms/step - loss: 0.7228 - val_loss: 0.6305
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 60ms/step - loss: 0.5946 - val_loss: 0.5673
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 62ms/step - loss: 0.5526 - val_loss: 0.5535
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 63ms/step - loss: 0.5601 - val_loss: 0.5489
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 63ms/step - loss: 0.5519 - val_loss: 0.5394
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 63ms/step - loss: 0.5238 - val_loss: 0.5346
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 64ms/step - loss: 0.5167 - val_loss: 0.5492
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 62ms/step - loss: 0.5534 - val_loss: 0.5297
Epoch 9/100
[1m108/108

[I 2025-04-27 17:18:31,152] Trial 0 finished with value: 0.7947096032202415 and parameters: {'depth': 4, 'n_estimators': 1973, 'learning_rate_weights': 0.18302528605867013, 'learning_rate_index': 0.14970475520083945, 'learning_rate_values': 0.03908905824656489, 'learning_rate_leaf': 0.03908303063201704, 'cosine_decay_steps': 100, 'dropout': 0.5, 'selected_variables': 1.0, 'data_subset_fraction': 0.8, 'focal_loss': True, 'temperature': 0.25}. Best is trial 0 with value: 0.7947096032202415.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.7160 - val_loss: 0.5912
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.6047 - val_loss: 0.5447
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.5585 - val_loss: 0.5509
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.5885 - val_loss: 0.5461
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.5639 - val_loss: 0.5268
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.5432 - val_loss: 0.5112
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.5403 - val_loss: 0.5523
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.5741 - val_loss: 0.5435
Epoch 9/100
[1m109/109

[I 2025-04-27 17:19:16,830] Trial 1 finished with value: 0.7745830937320299 and parameters: {'depth': 3, 'n_estimators': 961, 'learning_rate_weights': 0.09165382463909356, 'learning_rate_index': 0.11407188905583729, 'learning_rate_values': 0.1963154727521141, 'learning_rate_leaf': 0.0499984781613741, 'cosine_decay_steps': 100, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 1.0, 'focal_loss': True, 'temperature': 0.25}. Best is trial 0 with value: 0.7947096032202415.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 45ms/step - loss: 0.6668 - val_loss: 0.5085
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - loss: 0.5406 - val_loss: 0.4869
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - loss: 0.5261 - val_loss: 0.4771
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 44ms/step - loss: 0.5185 - val_loss: 0.4713
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 44ms/step - loss: 0.5067 - val_loss: 0.4585
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - loss: 0.4980 - val_loss: 0.4529
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 44ms/step - loss: 0.4920 - val_loss: 0.4512
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - loss: 0.4847 - val_loss: 0.4509
Epoch 9/100
[1m109/109

[I 2025-04-27 17:20:49,293] Trial 2 finished with value: 0.8085106382978723 and parameters: {'depth': 3, 'n_estimators': 1909, 'learning_rate_weights': 0.06476911740184423, 'learning_rate_index': 0.1656643188600601, 'learning_rate_values': 0.0779965979147438, 'learning_rate_leaf': 0.13006499849233494, 'cosine_decay_steps': 1000, 'dropout': 0.25, 'selected_variables': 0.75, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0.25}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - loss: 0.6295 - val_loss: 0.5176
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - loss: 0.5354 - val_loss: 0.5799
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - loss: 0.5305 - val_loss: 0.4883
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - loss: 0.5205 - val_loss: 0.5074
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - loss: 0.5492 - val_loss: 0.5239
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - loss: 0.5629 - val_loss: 0.6741
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - loss: 0.5690 - val_loss: 0.5443
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - loss: 0.5801 - val_loss: 0.6087
Epoch 9/100
[1m109/109

[I 2025-04-27 17:21:49,668] Trial 3 finished with value: 0.6607245543415756 and parameters: {'depth': 4, 'n_estimators': 943, 'learning_rate_weights': 0.1357197511812463, 'learning_rate_index': 0.03531696382119319, 'learning_rate_values': 0.2005690254904345, 'learning_rate_leaf': 0.01873020585557473, 'cosine_decay_steps': 0, 'dropout': 0.25, 'selected_variables': 0.75, 'data_subset_fraction': 1.0, 'focal_loss': True, 'temperature': 0}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 47ms/step - loss: 0.6739 - val_loss: 0.5496
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 0.5859 - val_loss: 0.5872
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 0.5919 - val_loss: 0.5531
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 0.5831 - val_loss: 0.5174
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 0.5678 - val_loss: 0.5418
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 0.5544 - val_loss: 0.5014
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 46ms/step - loss: 0.5388 - val_loss: 0.4969
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 47ms/step - loss: 0.5336 - val_loss: 0.4941
Epoch 9/100
[1m109/109

[I 2025-04-27 17:23:27,784] Trial 4 finished with value: 0.7280046003450259 and parameters: {'depth': 4, 'n_estimators': 1011, 'learning_rate_weights': 0.1824285839666822, 'learning_rate_index': 0.15942561209166778, 'learning_rate_values': 0.22181446436982402, 'learning_rate_leaf': 0.11810650979797113, 'cosine_decay_steps': 1000, 'dropout': 0.25, 'selected_variables': 1.0, 'data_subset_fraction': 1.0, 'focal_loss': True, 'temperature': 0.25}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 37ms/step - loss: 0.6981 - val_loss: 0.6346
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 0.6239 - val_loss: 0.5696
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 0.5646 - val_loss: 0.5570
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 0.5837 - val_loss: 0.5416
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 0.5538 - val_loss: 0.5266
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 0.5162 - val_loss: 0.5117
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 0.5140 - val_loss: 0.5264
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 0.5521 - val_loss: 0.5341
Epoch 9/100
[1m108/108

[I 2025-04-27 17:25:56,448] Trial 5 finished with value: 0.80448533640023 and parameters: {'depth': 4, 'n_estimators': 1142, 'learning_rate_weights': 0.18891222952190787, 'learning_rate_index': 0.057276661556356454, 'learning_rate_values': 0.01933727946621537, 'learning_rate_leaf': 0.07250888808315063, 'cosine_decay_steps': 100, 'dropout': 0.25, 'selected_variables': 0.75, 'data_subset_fraction': 0.8, 'focal_loss': True, 'temperature': 0.25}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 582ms/step - loss: 0.7304 - val_loss: 0.5707
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 585ms/step - loss: 0.5660 - val_loss: 0.5400
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 564ms/step - loss: 0.5441 - val_loss: 0.5270
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 566ms/step - loss: 0.5308 - val_loss: 0.5184
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 568ms/step - loss: 0.5225 - val_loss: 0.5123
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 569ms/step - loss: 0.5133 - val_loss: 0.5069
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 569ms/step - loss: 0.5051 - val_loss: 0.5029
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 566ms/step - loss: 0.5006 - val_loss: 0.5008
Epoch 9/

[I 2025-04-27 18:04:56,017] Trial 6 finished with value: 0.8010350776308223 and parameters: {'depth': 7, 'n_estimators': 1834, 'learning_rate_weights': 0.001837337419744557, 'learning_rate_index': 0.1277357509141337, 'learning_rate_values': 0.10441100968687987, 'learning_rate_leaf': 0.05560474183663549, 'cosine_decay_steps': 1000, 'dropout': 0.5, 'selected_variables': 0.75, 'data_subset_fraction': 0.8, 'focal_loss': True, 'temperature': 0.25}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 56ms/step - loss: 0.6001 - val_loss: 0.4995
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.4850 - val_loss: 0.4276
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.4431 - val_loss: 0.4172
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.4475 - val_loss: 0.4042
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.4349 - val_loss: 0.3964
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 56ms/step - loss: 0.4204 - val_loss: 0.3883
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.4194 - val_loss: 0.4032
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - loss: 0.4302 - val_loss: 0.4012
Epoch 9/100
[1m109/109

[I 2025-04-27 18:07:20,431] Trial 7 finished with value: 0.8004600345025877 and parameters: {'depth': 5, 'n_estimators': 591, 'learning_rate_weights': 0.0697337514127292, 'learning_rate_index': 0.22707564490306678, 'learning_rate_values': 0.05996651647767641, 'learning_rate_leaf': 0.03630922853559665, 'cosine_decay_steps': 100, 'dropout': 0.25, 'selected_variables': 1.0, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - loss: 0.5852 - val_loss: 0.4543
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 0.4832 - val_loss: 0.4291
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 0.4724 - val_loss: 0.4369
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - loss: 0.4681 - val_loss: 0.4222
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - loss: 0.4556 - val_loss: 0.4174
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - loss: 0.4386 - val_loss: 0.4053
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - loss: 0.4317 - val_loss: 0.4029
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - loss: 0.4270 - val_loss: 0.3978
Epoch 9/100
[1m109/109

[I 2025-04-27 18:08:19,615] Trial 8 finished with value: 0.6940770557791834 and parameters: {'depth': 3, 'n_estimators': 1420, 'learning_rate_weights': 0.1694233340243864, 'learning_rate_index': 0.004245298449071252, 'learning_rate_values': 0.12807205526899032, 'learning_rate_leaf': 0.056701294221964696, 'cosine_decay_steps': 1000, 'dropout': 0.25, 'selected_variables': 0.5, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.6798 - val_loss: 0.5473
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.5789 - val_loss: 0.5889
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.5659 - val_loss: 0.5493
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.5540 - val_loss: 0.5107
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - loss: 0.5427 - val_loss: 0.4998
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - loss: 0.5319 - val_loss: 0.4923
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - loss: 0.5181 - val_loss: 0.4842
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - loss: 0.5138 - val_loss: 0.4778
Epoch 9/100
[1m109/109

[I 2025-04-27 18:09:11,138] Trial 9 finished with value: 0.750431282346176 and parameters: {'depth': 4, 'n_estimators': 655, 'learning_rate_weights': 0.22431421791253636, 'learning_rate_index': 0.2251144724851163, 'learning_rate_values': 0.15831205417258967, 'learning_rate_leaf': 0.0848235447830703, 'cosine_decay_steps': 1000, 'dropout': 0, 'selected_variables': 0.5, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0.25}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 554ms/step - loss: 0.5307 - val_loss: 0.4044
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 523ms/step - loss: 0.4125 - val_loss: 0.3887
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 516ms/step - loss: 0.3945 - val_loss: 0.3859
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 521ms/step - loss: 0.3859 - val_loss: 0.3848
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 536ms/step - loss: 0.3813 - val_loss: 0.3851
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 546ms/step - loss: 0.3770 - val_loss: 0.3834
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 533ms/step - loss: 0.3781 - val_loss: 0.3884
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 533ms/step - loss: 0.3735 - val_loss: 0.3855
Epoch 9/

[I 2025-04-27 18:28:21,925] Trial 10 finished with value: 0.8056354226566993 and parameters: {'depth': 7, 'n_estimators': 1597, 'learning_rate_weights': 0.02187747125407699, 'learning_rate_index': 0.18475251528359143, 'learning_rate_values': 0.08297121686044934, 'learning_rate_leaf': 0.21500377842177668, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 0.8, 'focal_loss': False, 'temperature': 0}. Best is trial 2 with value: 0.8085106382978723.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 541ms/step - loss: 0.5314 - val_loss: 0.4026
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 520ms/step - loss: 0.4104 - val_loss: 0.3888
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 531ms/step - loss: 0.3922 - val_loss: 0.3853
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 545ms/step - loss: 0.3834 - val_loss: 0.3891
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 531ms/step - loss: 0.3766 - val_loss: 0.3864
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 532ms/step - loss: 0.3712 - val_loss: 0.3833
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 528ms/step - loss: 0.3672 - val_loss: 0.3801
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 514ms/step - loss: 0.3683 - val_loss: 0.3817
Epoch 9/

[I 2025-04-27 18:44:23,281] Trial 11 finished with value: 0.8102357676825762 and parameters: {'depth': 7, 'n_estimators': 1600, 'learning_rate_weights': 0.01704828491387897, 'learning_rate_index': 0.18868618778213594, 'learning_rate_values': 0.07922517076629519, 'learning_rate_leaf': 0.21421108172170697, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 0.8, 'focal_loss': False, 'temperature': 0}. Best is trial 11 with value: 0.8102357676825762.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 217ms/step - loss: 0.5282 - val_loss: 0.4148
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 216ms/step - loss: 0.4254 - val_loss: 0.4045
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 217ms/step - loss: 0.4199 - val_loss: 0.4013
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 216ms/step - loss: 0.4194 - val_loss: 0.4142
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 216ms/step - loss: 0.4242 - val_loss: 0.3962
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 226ms/step - loss: 0.4261 - val_loss: 0.4094
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 225ms/step - loss: 0.4225 - val_loss: 0.4046
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 225ms/step - loss: 0.4272 - val_loss: 0.4057
Epoch 9/

[I 2025-04-27 18:50:23,712] Trial 12 finished with value: 0.8050603795284647 and parameters: {'depth': 6, 'n_estimators': 1693, 'learning_rate_weights': 0.049061628643559695, 'learning_rate_index': 0.19209874046594938, 'learning_rate_values': 0.13054548666028276, 'learning_rate_leaf': 0.1986830396237681, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 0.8, 'focal_loss': False, 'temperature': 0}. Best is trial 11 with value: 0.8102357676825762.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 298ms/step - loss: 0.5387 - val_loss: 0.4140
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 297ms/step - loss: 0.4357 - val_loss: 0.4021
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 295ms/step - loss: 0.4218 - val_loss: 0.4113
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 294ms/step - loss: 0.4124 - val_loss: 0.4037
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 297ms/step - loss: 0.4021 - val_loss: 0.3865
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 299ms/step - loss: 0.3952 - val_loss: 0.3826
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 299ms/step - loss: 0.3942 - val_loss: 0.3892
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 309ms/step - loss: 0.4078 - val_loss: 0.3803
Epoch 9/

[I 2025-04-27 19:00:08,938] Trial 13 finished with value: 0.7964347326049454 and parameters: {'depth': 6, 'n_estimators': 1995, 'learning_rate_weights': 0.10985190746086779, 'learning_rate_index': 0.09337725997209825, 'learning_rate_values': 0.06985443237853083, 'learning_rate_leaf': 0.17055651357525178, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 0.8, 'focal_loss': False, 'temperature': 0}. Best is trial 11 with value: 0.8102357676825762.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 328ms/step - loss: 0.6767 - val_loss: 0.5847
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 325ms/step - loss: 0.6168 - val_loss: 0.5888
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 326ms/step - loss: 0.6024 - val_loss: 0.5876
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 329ms/step - loss: 0.5856 - val_loss: 0.5890
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 343ms/step - loss: 0.5708 - val_loss: 0.5895
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 333ms/step - loss: 0.5643 - val_loss: 0.5916
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 325ms/step - loss: 0.5454 - val_loss: 0.5918
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 340ms/step - loss: 0.5467 - val_loss: 0.5898
Epoch 9/

[I 2025-04-27 19:06:48,866] Trial 14 finished with value: 0.7636572742955722 and parameters: {'depth': 6, 'n_estimators': 1482, 'learning_rate_weights': 0.040505484048725865, 'learning_rate_index': 0.2448584553840088, 'learning_rate_values': 0.0027211004124337995, 'learning_rate_leaf': 0.2443188897522101, 'cosine_decay_steps': 1000, 'dropout': 0.5, 'selected_variables': 0.75, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0.25}. Best is trial 11 with value: 0.8102357676825762.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 98ms/step - loss: 0.5443 - val_loss: 0.4260
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 96ms/step - loss: 0.4401 - val_loss: 0.4286
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 97ms/step - loss: 0.4390 - val_loss: 0.4035
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 97ms/step - loss: 0.4360 - val_loss: 0.4010
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 96ms/step - loss: 0.4326 - val_loss: 0.4169
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 96ms/step - loss: 0.4352 - val_loss: 0.4020
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 97ms/step - loss: 0.4352 - val_loss: 0.4127
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 96ms/step - loss: 0.4414 - val_loss: 0.4106
Epoch 9/100
[1m

[I 2025-04-27 19:09:16,758] Trial 15 finished with value: 0.7981598619896493 and parameters: {'depth': 5, 'n_estimators': 1728, 'learning_rate_weights': 0.07009728008981919, 'learning_rate_index': 0.19040163194083662, 'learning_rate_values': 0.10720767356218552, 'learning_rate_leaf': 0.14456597712846647, 'cosine_decay_steps': 0, 'dropout': 0.25, 'selected_variables': 0.5, 'data_subset_fraction': 0.8, 'focal_loss': False, 'temperature': 0}. Best is trial 11 with value: 0.8102357676825762.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 606ms/step - loss: 0.6706 - val_loss: 0.4918
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 607ms/step - loss: 0.5186 - val_loss: 0.4655
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 601ms/step - loss: 0.4963 - val_loss: 0.4540
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 601ms/step - loss: 0.4835 - val_loss: 0.4487
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 608ms/step - loss: 0.4775 - val_loss: 0.4449
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 602ms/step - loss: 0.4710 - val_loss: 0.4444
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 604ms/step - loss: 0.4663 - val_loss: 0.4426
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 603ms/step - loss: 0.4628 - val_loss: 0.4451
Epoch 9/

[I 2025-04-27 19:28:02,754] Trial 16 finished with value: 0.816561242093157 and parameters: {'depth': 7, 'n_estimators': 1289, 'learning_rate_weights': 0.012274737330179399, 'learning_rate_index': 0.15721258434952878, 'learning_rate_values': 0.04540455877277873, 'learning_rate_leaf': 0.1180834315364944, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0.25}. Best is trial 16 with value: 0.816561242093157.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 416ms/step - loss: 0.5646 - val_loss: 0.4217
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 414ms/step - loss: 0.4214 - val_loss: 0.3947
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 413ms/step - loss: 0.3946 - val_loss: 0.3845
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 416ms/step - loss: 0.3792 - val_loss: 0.3780
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 420ms/step - loss: 0.3686 - val_loss: 0.3746
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 417ms/step - loss: 0.3605 - val_loss: 0.3718
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 417ms/step - loss: 0.3528 - val_loss: 0.3699
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 419ms/step - loss: 0.3476 - val_loss: 0.3678
Epoch 9/

[I 2025-04-27 19:45:25,825] Trial 17 finished with value: 0.8154111558366878 and parameters: {'depth': 7, 'n_estimators': 1288, 'learning_rate_weights': 0.002030407069880641, 'learning_rate_index': 0.09792839446945038, 'learning_rate_values': 0.04247842419399915, 'learning_rate_leaf': 0.10050391075659731, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 0.8, 'focal_loss': False, 'temperature': 0}. Best is trial 16 with value: 0.816561242093157.


Epoch 1/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 175ms/step - loss: 0.5656 - val_loss: 0.4242
Epoch 2/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 174ms/step - loss: 0.4239 - val_loss: 0.3985
Epoch 3/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 175ms/step - loss: 0.3969 - val_loss: 0.3883
Epoch 4/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 174ms/step - loss: 0.3821 - val_loss: 0.3820
Epoch 5/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 180ms/step - loss: 0.3727 - val_loss: 0.3779
Epoch 6/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 176ms/step - loss: 0.3632 - val_loss: 0.3737
Epoch 7/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 176ms/step - loss: 0.3563 - val_loss: 0.3726
Epoch 8/100
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 192ms/step - loss: 0.3498 - val_loss: 0.3714
Epoch 9/

[I 2025-04-27 19:52:11,438] Trial 18 finished with value: 0.8154111558366878 and parameters: {'depth': 6, 'n_estimators': 1288, 'learning_rate_weights': 0.0029555761671426306, 'learning_rate_index': 0.08412792474148581, 'learning_rate_values': 0.04091892611341133, 'learning_rate_leaf': 0.09507735001199624, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 1.0, 'data_subset_fraction': 0.8, 'focal_loss': False, 'temperature': 0}. Best is trial 16 with value: 0.816561242093157.


Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 610ms/step - loss: 0.6683 - val_loss: 0.5657
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 603ms/step - loss: 0.5615 - val_loss: 0.5302
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 585ms/step - loss: 0.5468 - val_loss: 0.5116
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 580ms/step - loss: 0.5327 - val_loss: 0.4886
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 603ms/step - loss: 0.5329 - val_loss: 0.5052
Epoch 6/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 588ms/step - loss: 0.5547 - val_loss: 0.5209
Epoch 7/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 592ms/step - loss: 0.5342 - val_loss: 0.5030
Epoch 8/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 583ms/step - loss: 0.5234 - val_loss: 0.5016
Epoch 9/

[I 2025-04-27 20:07:18,548] Trial 19 finished with value: 0.7889591719378953 and parameters: {'depth': 7, 'n_estimators': 1235, 'learning_rate_weights': 0.24837124339341893, 'learning_rate_index': 0.12465014895329563, 'learning_rate_values': 0.03752482242956686, 'learning_rate_leaf': 0.16440266648322213, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.5, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0.25}. Best is trial 16 with value: 0.816561242093157.


Best CV accuracy: 0.816561242093157
Best params: {'depth': 7, 'n_estimators': 1289, 'learning_rate_weights': 0.012274737330179399, 'learning_rate_index': 0.15721258434952878, 'learning_rate_values': 0.04540455877277873, 'learning_rate_leaf': 0.1180834315364944, 'cosine_decay_steps': 0, 'dropout': 0, 'selected_variables': 0.75, 'data_subset_fraction': 1.0, 'focal_loss': False, 'temperature': 0.25}
Epoch 1/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 625ms/step - loss: 0.6706 - val_loss: 0.4918
Epoch 2/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 636ms/step - loss: 0.5186 - val_loss: 0.4655
Epoch 3/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 650ms/step - loss: 0.4963 - val_loss: 0.4540
Epoch 4/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 620ms/step - loss: 0.4835 - val_loss: 0.4487
Epoch 5/100
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 622ms/step - loss: 0.4775

## Prediction

In [None]:
y_pred_raw = final_model.predict(X_valid)
y_pred = np.argmax(y_pred_raw, axis=1)

# Compute accuracy
accuracy = accuracy_score(y_valid, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")

In [None]:
test_df = pd.read_csv('data/test_processed.csv')
X_test = test_df.drop(columns=['PassengerId']).values

# Generate predictions with your tuned GRANDE model
preds = final_model.predict(X_test)
# Convert to binary labels
y_test_pred = (preds[:, 1] > 0.5)

# Build submission DataFrame
submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Transported': y_test_pred
})
# Save to CSV
submission.to_csv('submission_grande.csv', index=False)