In [34]:
import torch
import torch.nn as nn

import pandas as pd
import numpy as np

from torch.utils.data import DataLoader
from torch.utils.data import Dataset

from sklearn.model_selection import TimeSeriesSplit

import random

import os
import json

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import SuccessiveHalvingPruner
from datetime import datetime

from model import TCNRegressor
from tuning import ray_train

import train
import utils
import tuning
import data
from data import transform_data 
from data import TimeSeriesDataset


if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [35]:
# Number of trials for tuning
trials = 100

# Fixed parameters (not tuned)
fixed_params = {
    "lag_periods": [1, 3, 12],
    "features": ['BIR', 'BOC', 'Other Offices', 'TotalTrade_PHPMN', 'NominalGDP_disagg', 'Pop_disagg'],
    "labels": ["BIR", 'BOC', 'Other Offices'],
    "dummy_vars": ['COVID-19', 'TRAIN', 'CREATE', 'FIST', 'BIR_COMM'],
    "experiment_name": "test"
}

# Resource detection (equivalent to your resources setup)
if torch.cuda.is_available():
    device = "cuda"
    n_jobs = 4  # Run 4 trials concurrently (equivalent to max_concurrent_trials=4)
    print("Using CUDA GPU")
elif torch.backends.mps.is_available():
    device = "mps"
    n_jobs = 1
    print("Using MPS (Mac GPU) - GPU allocation not specified")
else:
    device = "cpu"
    n_jobs = 4
    print("Using CPU only")

def objective(trial):
    """
    Optuna objective function - equivalent to ray_train_with_resources
    Replace the train_model call with your actual ray_train function
    """
    
    # Define search space (same as your Ray Tune search_space)
    config = {
        # === MODEL ARCHITECTURE ===
            # === MODEL ARCHITECTURE ===
        "num_channels": trial.suggest_categorical("num_channels", [[64], [64, 64], [64, 64, 64], [128], [128, 128], [128, 128, 128], [256], [256, 256], [256, 256, 256]]),
        "kernel_size": trial.suggest_categorical("kernel_size", [2, 3, 4, 5]),
        "dropout": trial.suggest_float("dropout", 0.1, 0.4),
        "output_size": 3,  # Fixed: number of output variables (BIR, BOC, Other Offices)
        
        # === OPTIMIZER ===
        "lr": trial.suggest_float("lr", 5e-4, 5e-2, log=True),
        "wd": trial.suggest_float("wd", 1e-6, 1e-4, log=True),
        
        # === SCHEDULER ===
        "factor": trial.suggest_float("factor", 0.2, 0.5),
        "patience": trial.suggest_categorical("patience", [5, 10]),
        
        # === TRAINING ===
        "batch_size": trial.suggest_categorical("batch_size", [16, 32, 64]),
        "seq_len": trial.suggest_categorical("seq_len", [3, 6, 12, 18, 24]),
        "l1_lambda": trial.suggest_float("l1_lambda", 1e-5, 1e-2, log=True),
        
        # === DEVICE ===
        "device": device,
    }
    
    # Add fixed parameters
    config.update(fixed_params)
    
    try:
        # REPLACE THIS with your actual ray_train function call
        # Your ray_train should accept config and optionally trial for intermediate reporting
        result = ray_train(config, trial=trial)
        
        # Handle result (loss, std) or just loss
        if isinstance(result, tuple):
            loss, std = result
            trial.set_user_attr("std", std)
        else:
            loss = result
            trial.set_user_attr("std", None)
        
        return loss
    
    except optuna.TrialPruned:
        raise
    except Exception as e:
        print(f"Trial {trial.number} failed: {str(e)}")
        return float('inf')

# ASHA Scheduler equivalent (same parameters as your ASHAScheduler)
# max_t=50, grace_period=10, reduction_factor=2
pruner = SuccessiveHalvingPruner(
    min_resource=10,        # grace_period
    reduction_factor=2,     # reduction_factor
    min_early_stopping_rate=0
)

# Optuna Search (TPE sampler - same as OptunaSearch)
sampler = TPESampler(
    seed=42,
    n_startup_trials=10,
    multivariate=True
)

# Create study (equivalent to creating Tuner)
study = optuna.create_study(
    direction="minimize",  # mode="min"
    sampler=sampler,
    pruner=pruner,
    study_name="gru_tuning"
)

# Progress reporter (equivalent to CLIReporter)
class ProgressReporter:
    def __init__(self, max_trials, metric_columns=None):
        self.max_trials = max_trials
        self.metric_columns = metric_columns or ["loss", "std"]
        self.trials_completed = 0
        self.trials_running = 0
        self.best_loss = float('inf')
        self.recent_trials = []
        
    def __call__(self, study, trial):
        self.trials_completed += 1
        
        # Update best loss
        if study.best_value < self.best_loss:
            self.best_loss = study.best_value
        
        # Get trial info
        state = trial.state.name
        loss = trial.value if trial.value is not None else float('inf')
        std = trial.user_attrs.get("std", None)
        
        # Store recent trial info
        trial_info = {
            'trial_number': trial.number,
            'state': state,
            'loss': loss,
            'std': std,
            'params': trial.params
        }
        self.recent_trials.append(trial_info)
        
        # Keep only last 15 trials (max_progress_rows=15)
        if len(self.recent_trials) > 15:
            self.recent_trials.pop(0)
        
        # Print intermediate table (similar to CLIReporter)
        if self.trials_completed % 5 == 0 or self.trials_completed == 1:
            self._print_table()
    
    def _print_table(self):
        print("\n" + "="*100)
        print(f"Progress: {self.trials_completed}/{self.max_trials} trials | Best loss: {self.best_loss:.4f}")
        print("="*100)
        
        # Header
        print(f"{'Trial':<8} {'State':<12} {'Loss':<12} {'Std':<12} {'hidden_size':<12} {'lr':<12} {'batch_size':<12}")
        print("-"*100)
        
        # Sorted by loss (sort_by_metric=True)
        sorted_trials = sorted(
            [t for t in self.recent_trials if t['loss'] != float('inf')],
            key=lambda x: x['loss']
        )
        
        for t in sorted_trials[:15]:  # max_progress_rows=15
            loss_str = f"{t['loss']:.4f}" if t['loss'] != float('inf') else "N/A"
            std_str = f"{t['std']:.4f}" if t['std'] is not None else "N/A"
            hidden_size = t['params'].get('hidden_size', 'N/A')
            lr = f"{t['params'].get('lr', 0):.2e}" if 'lr' in t['params'] else "N/A"
            batch_size = t['params'].get('batch_size', 'N/A')
            
            print(f"{t['trial_number']:<8} {t['state']:<12} {loss_str:<12} {std_str:<12} {hidden_size:<12} {lr:<12} {batch_size:<12}")
        
        print("="*100 + "\n")

# Statistics tracker (prints every 30 seconds worth of trials)
class StatsTracker:
    def __init__(self):
        self.start_time = datetime.now()
        self.last_report_time = self.start_time
        
    def __call__(self, study, trial):
        current_time = datetime.now()
        elapsed_since_report = (current_time - self.last_report_time).total_seconds()
        
        # Report every ~30 seconds (max_report_frequency=30)
        if elapsed_since_report >= 30:
            self.last_report_time = current_time
            
            completed = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
            pruned = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])
            failed = len([t for t in study.trials if t.state == optuna.trial.TrialState.FAIL])
            
            total_elapsed = (current_time - self.start_time).total_seconds()
            
            print(f"\n{'#'*100}")
            print(f"STATUS UPDATE (after {total_elapsed/60:.1f} minutes)")
            print(f"{'#'*100}")
            print(f"Completed: {completed} | Pruned: {pruned} | Failed: {failed}")
            if completed > 0:
                print(f"Best loss so far: {study.best_value:.4f}")
                print(f"Average time per trial: {total_elapsed/trial.number:.1f}s")
            print(f"{'#'*100}\n")

# Initialize reporters
reporter = ProgressReporter(max_trials=trials, metric_columns=["loss", "std"])
stats_tracker = StatsTracker()

# Run optimization (equivalent to tuner.fit())
print("="*100)
print("STARTING HYPERPARAMETER TUNING")
print("="*100)
print(f"Total trials: {trials}")
print(f"Concurrent trials: {n_jobs}")
print(f"Device: {device}")
print(f"Study name: gru_tuning")
print(f"Storage: ray_results/ (Optuna equivalent)")
print("="*100 + "\n")

# Create results directory (equivalent to storage_path)
results_dir = os.path.abspath("ray_results/gru_tuning")
os.makedirs(results_dir, exist_ok=True)

# Optimize
study.optimize(
    objective,
    n_trials=trials,
    callbacks=[reporter, stats_tracker],
    n_jobs=n_jobs,  # max_concurrent_trials=4
    show_progress_bar=True,
    gc_after_trial=True
)

# Get best result (equivalent to results.get_best_result)
best_trial = study.best_trial
best_config = best_trial.params.copy()
best_config.update(fixed_params)  # Add fixed params

# Print results (same format as your original code)
print("\n" + "="*50)
print("Best Hyperparameters:")
print("="*50)
for key, value in best_config.items():
    print(f"{key}: {value}")
print(f"\nBest CV Loss: {best_trial.value:.4f}")

# Save to exp_folder (same structure as your original code)
exp_name = best_config.get('experiment_name', 'default')
exp_folder = f"configs/{exp_name}"
os.makedirs(exp_folder, exist_ok=True)

# Save best config (same path as your original code)
config_path = f"{exp_folder}/best_config.json"
with open(config_path, "w") as f:
    json.dump(best_config, f, indent=4)

print(f"\nConfig saved to: {config_path}")

# Save metadata (same structure as your original code)
metadata = {
    'best_loss': best_trial.value,
    'best_std': best_trial.user_attrs.get('std', None),
    'num_trials': len(study.trials),
    'num_completed': len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]),
    'num_pruned': len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]),
    'num_failed': len([t for t in study.trials if t.state == optuna.trial.TrialState.FAIL]),
    'timestamp': pd.Timestamp.now().isoformat()
}
metadata_path = f"{exp_folder}/metadata.json"
with open(metadata_path, "w") as f:
    json.dump(metadata, f, indent=4)

print(f"Metadata saved to: {metadata_path}")

# Save study results to ray_results directory
study_results_path = f"{results_dir}/study.pkl"
import joblib
joblib.dump(study, study_results_path)

# Save trials dataframe
trials_df = study.trials_dataframe()
trials_df.to_csv(f"{results_dir}/trials.csv", index=False)

print(f"\nStudy results saved to: {results_dir}/")
print("  - study.pkl (complete study object)")
print("  - trials.csv (all trial results)")

# Optional: Generate visualizations
try:
    import plotly
    
    fig1 = optuna.visualization.plot_optimization_history(study)
    fig1.write_html(f"{exp_folder}/optimization_history.html")
    
    fig2 = optuna.visualization.plot_param_importances(study)
    fig2.write_html(f"{exp_folder}/param_importances.html")
    
    fig3 = optuna.visualization.plot_parallel_coordinate(study)
    fig3.write_html(f"{exp_folder}/parallel_coordinate.html")
    
    print(f"\nVisualizations saved to {exp_folder}/")
    
except ImportError:
    print("\nInstall plotly for visualizations: pip install plotly kaleido")

print("\n" + "="*50)
print("TUNING COMPLETE")
print("="*50)


Argument ``multivariate`` is an experimental feature. The interface can change in the future.

[I 2026-02-11 22:47:59,925] A new study created in memory with name: gru_tuning


Using CPU only
STARTING HYPERPARAMETER TUNING
Total trials: 100
Concurrent trials: 4
Device: cpu
Study name: gru_tuning
Storage: ray_results/ (Optuna equivalent)




Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [64] which is of type list.


Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [64, 64] which is of type list.


Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [64, 64, 64] which is of type list.


Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [128] which is of type list.


Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [128, 128] which is of type list.


Choices for a categorical distribution should be a tuple of None, bool, int, float and str for persistent storage but contains [128, 128, 128] which is of type list.


Choices for a categorical d

Trial 0 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:47:59,949] Trial 0 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.18012931121174408, 'lr': 0.0027922133130790848, 'wd': 1.056526508740203e-05, 'factor': 0.4246376248898365, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.007027019122754872}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   0%|          | 0/100 [00:00<?, ?it/s]


Progress: 1/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------

Trial 1 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:   1%|          | 1/100 [00:00<00:30,  3.21it/s]

Trial 2 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 3 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:00,234] Trial 1 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.2781614331301506, 'lr': 0.004183991320937839, 'wd': 4.459222134519694e-06, 'factor': 0.2280299262523953, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.00035230976765284486}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   1%|          | 1/100 [00:00<00:30,  3.21it/s]

Trial 4 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:00,241] Trial 2 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.2969845100248468, 'lr': 0.0027257599950559954, 'wd': 9.413536248445449e-06, 'factor': 0.38682163156707816, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 2.573848078353802e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   3%|▎         | 3/100 [00:00<00:34,  2.85it/s]

[I 2026-02-11 22:48:00,259] Trial 3 finished with value: inf and parameters: {'num_channels': [64], 'kernel_size': 4, 'dropout': 0.1626315528843461, 'lr': 0.0013499936009127589, 'wd': 1.7952451567118597e-06, 'factor': 0.4277300282041418, 'patience': 5, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 0.00010893024179918024}. Best is trial 0 with value: inf.
Trial 5 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 6 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:   4%|▍         | 4/100 [00:01<00:40,  2.35it/s]

[I 2026-02-11 22:48:00,521] Trial 4 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 3, 'dropout': 0.23006058061242357, 'lr': 0.0030888745446504707, 'wd': 3.7601943114213497e-05, 'factor': 0.35088642595229, 'patience': 10, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.00033543185786597386}. Best is trial 0 with value: inf.

Progress: 5/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:   4%|▍         | 4/100 [00:01<00:40,  2.35it/s]

Trial 7 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:01,106] Trial 5 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 3, 'dropout': 0.2124113453217253, 'lr': 0.013705379481076448, 'wd': 8.052007864101989e-06, 'factor': 0.20388963731940063, 'patience': 5, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.0004673180046407225}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   6%|▌         | 6/100 [00:01<00:24,  3.88it/s]

[I 2026-02-11 22:48:01,109] Trial 6 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 2, 'dropout': 0.2505277183380963, 'lr': 0.02843362617912793, 'wd': 9.308872042472524e-06, 'factor': 0.4921435678234348, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 3.5250564673810216e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   6%|▌         | 6/100 [00:01<00:24,  3.88it/s]

Trial 8 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 9 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:01,410] Trial 7 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 2, 'dropout': 0.15206322451052856, 'lr': 0.0037212405016984655, 'wd': 1.1080692735435845e-05, 'factor': 0.2287437040668925, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 1.5627427132745966e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   8%|▊         | 8/100 [00:02<00:27,  3.40it/s]

[I 2026-02-11 22:48:01,916] Trial 8 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 4, 'dropout': 0.3988148004747134, 'lr': 0.013000822576685112, 'wd': 2.268866352494443e-05, 'factor': 0.3576479948599145, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 0.0002457767368220712}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   8%|▊         | 8/100 [00:02<00:27,  3.40it/s]

Trial 10 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 11 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:01,922] Trial 9 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 3, 'dropout': 0.15992730793350687, 'lr': 0.0005644073717156815, 'wd': 5.106675863114928e-06, 'factor': 0.27385723135401174, 'patience': 5, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.0004116752127717905}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:   9%|▉         | 9/100 [00:02<00:31,  2.92it/s]


Progress: 10/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------

[I 2026-02-11 22:48:02,427] Trial 10 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.16830596786538923, 'lr': 0.005349921072382946, 'wd': 2.4704695834450173e-06, 'factor': 0.3913913334557391, 'patience': 10, 'batch_size': 32, 'seq_len': 3, 'l1_lambda': 0.004293101367555992}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  11%|█         | 11/100 [00:03<00:29,  2.99it/s]

Trial 12 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:02,444] Trial 11 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.1541965758799672, 'lr': 0.002512981712633329, 'wd': 5.56616955704307e-05, 'factor': 0.35630650340347364, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.0070215008283279114}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  12%|█▏        | 12/100 [00:03<00:28,  3.11it/s]

Trial 14 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 13 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:02,942] Trial 12 finished with value: inf and parameters: {'num_channels': [128, 128, 128], 'kernel_size': 2, 'dropout': 0.3283618789820619, 'lr': 0.00457432316024354, 'wd': 4.896339889797126e-06, 'factor': 0.30755875148705725, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.00047312722340273126}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  12%|█▏        | 12/100 [00:03<00:28,  3.11it/s]

Trial 15 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:03,227] Trial 14 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.26175684039839825, 'lr': 0.0007597256147768654, 'wd': 3.504966678439189e-06, 'factor': 0.24035361317777004, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.0016227271986013134}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  13%|█▎        | 13/100 [00:04<00:32,  2.65it/s]

[I 2026-02-11 22:48:03,227] Trial 13 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 2, 'dropout': 0.1676758687864154, 'lr': 0.0014787985732083956, 'wd': 1.1356278661178378e-06, 'factor': 0.29919530039308306, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.0010526547274094609}. Best is trial 0 with value: inf.

Progress: 15/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  15%|█▌        | 15/100 [00:04<00:22,  3.84it/s]

Trial 16 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:03,508] Trial 15 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 3, 'dropout': 0.11955822733823399, 'lr': 0.004463528310467936, 'wd': 8.729208082036479e-06, 'factor': 0.4957598865358125, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 0.003563050766975382}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  16%|█▌        | 16/100 [00:04<00:22,  3.77it/s]

Trial 18 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 17 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:04,048] Trial 16 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 5, 'dropout': 0.30568844099398257, 'lr': 0.007612016584959578, 'wd': 1.54307527541786e-06, 'factor': 0.23360400842060072, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 1.8150756327798236e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  17%|█▋        | 17/100 [00:04<00:22,  3.73it/s]

Trial 19 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:04,340] Trial 18 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 5, 'dropout': 0.18770419242616698, 'lr': 0.005965631484813097, 'wd': 9.819847834339692e-06, 'factor': 0.4034463614115393, 'patience': 5, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.009964637465635853}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  18%|█▊        | 18/100 [00:04<00:21,  3.75it/s]

Trial 20 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:04,343] Trial 17 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.3035243054008977, 'lr': 0.02112672268068842, 'wd': 1.5620211692429738e-06, 'factor': 0.27086183081967147, 'patience': 5, 'batch_size': 16, 'seq_len': 12, 'l1_lambda': 0.0013048435542569016}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  19%|█▉        | 19/100 [00:05<00:21,  3.74it/s]

Trial 21 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:04,612] Trial 19 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.31507510039217473, 'lr': 0.000600267677628392, 'wd': 1.0201187525712638e-05, 'factor': 0.4001545450207983, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.007291075334246648}. Best is trial 0 with value: inf.

Progress: 20/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  20%|██        | 20/100 [00:05<00:21,  3.72it/s]

Trial 22 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:04,870] Trial 20 finished with value: inf and parameters: {'num_channels': [128, 128, 128], 'kernel_size': 2, 'dropout': 0.20727098139435973, 'lr': 0.0340688877767162, 'wd': 1.015736028405891e-06, 'factor': 0.2569942681646822, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.004416208428924843}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  20%|██        | 20/100 [00:05<00:21,  3.72it/s]

[I 2026-02-11 22:48:05,138] Trial 21 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 5, 'dropout': 0.3081508802294892, 'lr': 0.007192465684644395, 'wd': 1.6922195506293753e-05, 'factor': 0.36797448230610325, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 4.918943942943547e-05}. Best is trial 0 with value: inf.
Trial 23 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  22%|██▏       | 22/100 [00:06<00:24,  3.23it/s]

[I 2026-02-11 22:48:05,415] Trial 22 finished with value: inf and parameters: {'num_channels': [128, 128, 128], 'kernel_size': 2, 'dropout': 0.26392401963278317, 'lr': 0.0011412031446903007, 'wd': 1.4913922982412222e-05, 'factor': 0.4006434335914314, 'patience': 10, 'batch_size': 32, 'seq_len': 12, 'l1_lambda': 1.922282928042215e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  22%|██▏       | 22/100 [00:06<00:24,  3.23it/s]

Trial 24 failed: ray_train() got an unexpected keyword argument 'trial'[I 2026-02-11 22:48:05,687] Trial 23 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 3, 'dropout': 0.28924980789913735, 'lr': 0.004559176060829472, 'wd': 8.15440032534404e-06, 'factor': 0.43607348958255654, 'patience': 10, 'batch_size': 32, 'seq_len': 3, 'l1_lambda': 8.274232626690785e-05}. Best is trial 0 with value: inf.



Best trial: 0. Best value: inf:  24%|██▍       | 24/100 [00:06<00:28,  2.62it/s]

Trial 25 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:06,231] Trial 24 finished with value: inf and parameters: {'num_channels': [64], 'kernel_size': 3, 'dropout': 0.2524789642222669, 'lr': 0.009811746594465197, 'wd': 5.929535159645735e-06, 'factor': 0.4600774545755337, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.0035582616919280947}. Best is trial 0 with value: inf.

Progress: 25/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  24%|██▍       | 24/100 [00:06<00:28,  2.62it/s]

Trial 26 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 27 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:06,494] Trial 25 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.3429533909214738, 'lr': 0.0016581069565247603, 'wd': 8.932273847588728e-06, 'factor': 0.46274800120488874, 'patience': 10, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 3.498858204020434e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  25%|██▌       | 25/100 [00:07<00:27,  2.74it/s]

[I 2026-02-11 22:48:06,803] Trial 26 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 2, 'dropout': 0.12306543595105371, 'lr': 0.001987204552782561, 'wd': 3.945387487168286e-06, 'factor': 0.4506648706912759, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.0028795149892071052}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  26%|██▌       | 26/100 [00:07<00:25,  2.95it/s]

Trial 28 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:06,807] Trial 27 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 2, 'dropout': 0.2288163696794242, 'lr': 0.00785416261549385, 'wd': 4.0826938102390785e-06, 'factor': 0.2329036801199061, 'patience': 10, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.0005059835482332615}. Best is trial 0 with value: inf.
Trial 29 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  28%|██▊       | 28/100 [00:07<00:22,  3.13it/s]

[I 2026-02-11 22:48:07,394] Trial 28 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.19308960933397695, 'lr': 0.009203684076505344, 'wd': 7.118860085702622e-06, 'factor': 0.2223504492278817, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.0001703863413009771}. Best is trial 0 with value: inf.
Trial 30 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  29%|██▉       | 29/100 [00:08<00:18,  3.81it/s]

[I 2026-02-11 22:48:07,398] Trial 29 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.20549784698132428, 'lr': 0.005872503227476658, 'wd': 3.0330156210881065e-06, 'factor': 0.28442696200245743, 'patience': 5, 'batch_size': 64, 'seq_len': 3, 'l1_lambda': 9.953377634229378e-05}. Best is trial 0 with value: inf.

Progress: 30/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  30%|███       | 30/100 [00:08<00:18,  3.70it/s]

[I 2026-02-11 22:48:07,671] Trial 30 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.23664245377688892, 'lr': 0.0008418500010827547, 'wd': 3.082200975495583e-05, 'factor': 0.49830491092728724, 'patience': 10, 'batch_size': 16, 'seq_len': 18, 'l1_lambda': 0.0054926959143042214}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  31%|███       | 31/100 [00:08<00:18,  3.73it/s]

Trial 32 failed: ray_train() got an unexpected keyword argument 'trial'Trial 31 failed: ray_train() got an unexpected keyword argument 'trial'

[I 2026-02-11 22:48:08,481] Trial 31 finished with value: inf and parameters: {'num_channels': [64], 'kernel_size': 4, 'dropout': 0.16272511364390208, 'lr': 0.0015019491539135694, 'wd': 1.6803152792632603e-06, 'factor': 0.4472318338012714, 'patience': 5, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 0.00020137139153076305}. Best is trial 0 with value: inf.
Trial 33 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  32%|███▏      | 32/100 [00:08<00:18,  3.62it/s]

[I 2026-02-11 22:48:08,481] Trial 32 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 4, 'dropout': 0.11214078958098697, 'lr': 0.002926681526996086, 'wd': 1.2313512942819592e-06, 'factor': 0.3827245017692029, 'patience': 5, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 4.317275975537833e-05}. Best is trial 0 with value: inf.
Trial 34 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  32%|███▏      | 32/100 [00:09<00:18,  3.62it/s]

Trial 35 failed: ray_train() got an unexpected keyword argument 'trial'[I 2026-02-11 22:48:08,515] Trial 33 finished with value: inf and parameters: {'num_channels': [64, 64, 64], 'kernel_size': 3, 'dropout': 0.37002457036181285, 'lr': 0.0008355580716448893, 'wd': 1.1071034231045056e-05, 'factor': 0.31875003677184044, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.0003725861251319246}. Best is trial 0 with value: inf.



Best trial: 0. Best value: inf:  34%|███▍      | 34/100 [00:09<00:24,  2.68it/s]

[I 2026-02-11 22:48:08,797] Trial 34 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 3, 'dropout': 0.283567023658527, 'lr': 0.0014128119499858824, 'wd': 1.2599685443968616e-05, 'factor': 0.3338309846018662, 'patience': 5, 'batch_size': 32, 'seq_len': 3, 'l1_lambda': 1.7652089246778844e-05}. Best is trial 0 with value: inf.

Progress: 35/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  35%|███▌      | 35/100 [00:09<00:23,  2.74it/s]

[I 2026-02-11 22:48:09,076] Trial 35 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.254268530623899, 'lr': 0.0041874517912947095, 'wd': 9.173691859632896e-06, 'factor': 0.36621812186011293, 'patience': 5, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.00010757633014849547}. Best is trial 0 with value: inf.
Trial 36 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  35%|███▌      | 35/100 [00:10<00:23,  2.74it/s]

[I 2026-02-11 22:48:09,663] Trial 36 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.36297333806798526, 'lr': 0.0021493883756059586, 'wd': 4.406122503171936e-06, 'factor': 0.23339661421940514, 'patience': 5, 'batch_size': 32, 'seq_len': 3, 'l1_lambda': 0.00017832174725689533}. Best is trial 0 with value: inf.
Trial 37 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  37%|███▋      | 37/100 [00:10<00:22,  2.79it/s]

Trial 38 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:10,216] Trial 37 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.3165627245680598, 'lr': 0.0023870996795534354, 'wd': 6.2654269626480195e-06, 'factor': 0.23628876826001718, 'patience': 5, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 0.0007638836694215405}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  37%|███▋      | 37/100 [00:10<00:22,  2.79it/s]

[I 2026-02-11 22:48:10,223] Trial 38 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 3, 'dropout': 0.30202119510943554, 'lr': 0.006869821235363129, 'wd': 5.181981214000243e-06, 'factor': 0.35926356352845595, 'patience': 10, 'batch_size': 16, 'seq_len': 6, 'l1_lambda': 7.019926444851748e-05}. Best is trial 0 with value: inf.
Trial 39 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  38%|███▊      | 38/100 [00:10<00:24,  2.49it/s]

[I 2026-02-11 22:48:10,527] Trial 39 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 5, 'dropout': 0.21624335482316123, 'lr': 0.0017893072667160867, 'wd': 3.044035446156851e-06, 'factor': 0.3780351195163443, 'patience': 5, 'batch_size': 32, 'seq_len': 12, 'l1_lambda': 0.00029695854461010576}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  39%|███▉      | 39/100 [00:11<00:24,  2.46it/s]



Progress: 40/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------

[I 2026-02-11 22:48:11,047] Trial 40 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.3365937885979656, 'lr': 0.003434420182987731, 'wd': 4.81025861291252e-06, 'factor': 0.3643287808972578, 'patience': 10, 'batch_size': 32, 'seq_len': 12, 'l1_lambda': 4.3831624056990424e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  41%|████      | 41/100 [00:11<00:23,  2.49it/s]

Trial 41 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 42 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:11,317] Trial 41 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 3, 'dropout': 0.24366618331452952, 'lr': 0.00481038761504218, 'wd': 5.0317028667197223e-05, 'factor': 0.3926265832383268, 'patience': 10, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 6.456066138886808e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  41%|████      | 41/100 [00:11<00:23,  2.49it/s]

[I 2026-02-11 22:48:11,347] Trial 42 finished with value: inf and parameters: {'num_channels': [64], 'kernel_size': 4, 'dropout': 0.14915997925770977, 'lr': 0.0011792116701021026, 'wd': 2.0196610705315512e-06, 'factor': 0.35098995809206046, 'patience': 5, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 3.309095550607986e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  43%|████▎     | 43/100 [00:12<00:21,  2.71it/s]

Trial 44 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 43 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 45 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:11,890] Trial 44 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 3, 'dropout': 0.22720096842444765, 'lr': 0.0016427969126982657, 'wd': 4.379172753255439e-05, 'factor': 0.38609717749579275, 'patience': 10, 'batch_size': 32, 'seq_len': 24, 'l1_lambda': 0.0006732842328962085}. Best is trial 0 with value: inf.
Trial 46 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  43%|████▎     | 43/100 [00:12<00:21,  2.71it/s]

[I 2026-02-11 22:48:11,892] Trial 43 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 3, 'dropout': 0.26944204249126413, 'lr': 0.004688330487786305, 'wd': 7.332771125981668e-05, 'factor': 0.2879047854691493, 'patience': 10, 'batch_size': 64, 'seq_len': 24, 'l1_lambda': 0.0005698295038836825}. Best is trial 0 with value: inf.

Progress: 45/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  45%|████▌     | 45/100 [00:12<00:19,  2.80it/s]

[I 2026-02-11 22:48:11,931] Trial 45 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 4, 'dropout': 0.29004751839986237, 'lr': 0.005344165127490481, 'wd': 5.572086929524101e-06, 'factor': 0.40834196582832233, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 2.93421501854875e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  46%|████▌     | 46/100 [00:13<00:21,  2.48it/s]

[I 2026-02-11 22:48:11,938] Trial 46 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.3395746470269362, 'lr': 0.010877214242781048, 'wd': 6.766590262383835e-06, 'factor': 0.2550255903763545, 'patience': 5, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 0.0005815808146222483}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  47%|████▋     | 47/100 [00:13<00:21,  2.48it/s]

Trial 47 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:13,028] Trial 47 finished with value: inf and parameters: {'num_channels': [64, 64, 64], 'kernel_size': 5, 'dropout': 0.1381276177847376, 'lr': 0.0009996398796155515, 'wd': 1.5077578291045076e-06, 'factor': 0.36838378884116396, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 9.591845466595935e-05}. Best is trial 0 with value: inf.
Trial 48 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 49 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 50 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  48%|████▊     | 48/100 [00:13<00:16,  3.14it/s]

[I 2026-02-11 22:48:13,056] Trial 48 finished with value: inf and parameters: {'num_channels': [64, 64, 64], 'kernel_size': 3, 'dropout': 0.24204615322052014, 'lr': 0.008020760054492622, 'wd': 7.4751973721201315e-06, 'factor': 0.2793140104068282, 'patience': 10, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.0006049939012884556}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  49%|████▉     | 49/100 [00:13<00:15,  3.21it/s]

[I 2026-02-11 22:48:13,071] Trial 49 finished with value: inf and parameters: {'num_channels': [64, 64, 64], 'kernel_size': 3, 'dropout': 0.27066978677475634, 'lr': 0.006236624131828528, 'wd': 1.595929103935536e-05, 'factor': 0.4038632822071128, 'patience': 10, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.0021355035262826624}. Best is trial 0 with value: inf.

Progress: 50/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------

Trial 51 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  50%|█████     | 50/100 [00:14<00:15,  3.28it/s]

[I 2026-02-11 22:48:13,103] Trial 50 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 5, 'dropout': 0.2949021328768152, 'lr': 0.003907325802310597, 'wd': 6.317000527197607e-05, 'factor': 0.3315515411967328, 'patience': 10, 'batch_size': 16, 'seq_len': 3, 'l1_lambda': 0.0006004453070852604}. Best is trial 0 with value: inf.
Trial 52 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  50%|█████     | 50/100 [00:14<00:15,  3.28it/s]

[I 2026-02-11 22:48:13,922] Trial 51 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 3, 'dropout': 0.1680738187420229, 'lr': 0.04209959241823498, 'wd': 6.132982345872949e-06, 'factor': 0.21598221996206385, 'patience': 5, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 9.47972793054999e-05}. Best is trial 0 with value: inf.
Trial 53 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  52%|█████▏    | 52/100 [00:14<00:18,  2.65it/s]

[I 2026-02-11 22:48:14,263] Trial 52 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 5, 'dropout': 0.25222745319083095, 'lr': 0.010818685817498973, 'wd': 4.058346833636923e-06, 'factor': 0.24379187606568814, 'patience': 5, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.000223474125553001}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  53%|█████▎    | 53/100 [00:14<00:13,  3.49it/s]

[I 2026-02-11 22:48:14,297] Trial 53 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 5, 'dropout': 0.2147318896840632, 'lr': 0.0026801318205264286, 'wd': 3.43829134045582e-05, 'factor': 0.45614398001544915, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 0.005923075201102764}. Best is trial 0 with value: inf.
Trial 54 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 55 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  54%|█████▍    | 54/100 [00:15<00:13,  3.45it/s]

[I 2026-02-11 22:48:14,853] Trial 54 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.1803000691544361, 'lr': 0.0023846245734941373, 'wd': 1.5888060958695544e-05, 'factor': 0.46632355242542156, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.005654926118876091}. Best is trial 0 with value: inf.
Trial 56 failed: ray_train() got an unexpected keyword argument 'trial'

Progress: 55/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  55%|█████▌    | 55/100 [00:15<00:16,  2.68it/s]

[I 2026-02-11 22:48:14,855] Trial 55 finished with value: inf and parameters: {'num_channels': [128, 128, 128], 'kernel_size': 3, 'dropout': 0.17620252846252002, 'lr': 0.004644773975741823, 'wd': 1.3525312026987046e-05, 'factor': 0.42889979371721515, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.004321372828849195}. Best is trial 0 with value: inf.
Trial 57 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  55%|█████▌    | 55/100 [00:15<00:16,  2.68it/s]

[I 2026-02-11 22:48:15,178] Trial 56 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 3, 'dropout': 0.2778068938381306, 'lr': 0.0038688784204482427, 'wd': 1.5371430972448425e-05, 'factor': 0.21221239789283938, 'patience': 5, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.0003591381264511439}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  56%|█████▌    | 56/100 [00:16<00:15,  2.85it/s]

Trial 58 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:15,784] Trial 57 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 3, 'dropout': 0.31349583213663984, 'lr': 0.0007734081859504382, 'wd': 1.53387407940644e-05, 'factor': 0.43558639940601035, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 1.580363644378732e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  58%|█████▊    | 58/100 [00:16<00:10,  3.91it/s]

Trial 59 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:16,076] Trial 58 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 4, 'dropout': 0.1238288126065007, 'lr': 0.001855589921846553, 'wd': 9.103709993207756e-06, 'factor': 0.48355274746920845, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.0033775141254536773}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  59%|█████▉    | 59/100 [00:17<00:13,  3.02it/s]

[I 2026-02-11 22:48:16,368] Trial 59 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.20243590343786422, 'lr': 0.0011849360311062137, 'wd': 8.664672812000852e-06, 'factor': 0.4089948901635024, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 1.9547129011439746e-05}. Best is trial 0 with value: inf.

Progress: 60/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  60%|██████    | 60/100 [00:17<00:13,  3.02it/s]

Trial 60 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 61 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 62 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 63 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:16,955] Trial 60 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 3, 'dropout': 0.1137231385624185, 'lr': 0.013232161660353198, 'wd': 9.526992413280435e-06, 'factor': 0.23783800923823134, 'patience': 5, 'batch_size': 16, 'seq_len': 18, 'l1_lambda': 0.0007937079735040925}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  61%|██████    | 61/100 [00:17<00:12,  3.09it/s]

[I 2026-02-11 22:48:16,985] Trial 61 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.20306630411187174, 'lr': 0.017274794856559285, 'wd': 1.82291798230315e-05, 'factor': 0.23670436219671373, 'patience': 5, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.008802958209837565}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  62%|██████▏   | 62/100 [00:17<00:10,  3.69it/s]

[I 2026-02-11 22:48:16,999] Trial 62 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 2, 'dropout': 0.23167620115021761, 'lr': 0.02571978336846969, 'wd': 1.637155104977999e-06, 'factor': 0.444063235718666, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 2.7519624825231348e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  62%|██████▏   | 62/100 [00:17<00:10,  3.69it/s]

[I 2026-02-11 22:48:17,004] Trial 63 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 2, 'dropout': 0.16978172603784994, 'lr': 0.022052911114594172, 'wd': 1.6475502463096498e-05, 'factor': 0.47541504809356927, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 3.1727868479085105e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  64%|██████▍   | 64/100 [00:18<00:11,  3.01it/s]

Trial 64 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 65 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:18,211] Trial 64 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 4, 'dropout': 0.2647830957472645, 'lr': 0.03474154553187729, 'wd': 1.2660314842935049e-05, 'factor': 0.4577162609614889, 'patience': 5, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 2.688536854802255e-05}. Best is trial 0 with value: inf.

Progress: 65/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  65%|██████▌   | 65/100 [00:18<00:11,  3.13it/s]

Trial 66 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 67 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:18,214] Trial 65 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.13354227251348116, 'lr': 0.0007759380540053023, 'wd': 3.9403428874266645e-06, 'factor': 0.41059510837180685, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 0.006359391921703271}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  65%|██████▌   | 65/100 [00:18<00:11,  3.13it/s]

[I 2026-02-11 22:48:18,511] Trial 66 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 4, 'dropout': 0.2398670339663987, 'lr': 0.0030440775739910735, 'wd': 5.401041214766441e-06, 'factor': 0.2385382087512161, 'patience': 5, 'batch_size': 64, 'seq_len': 24, 'l1_lambda': 0.0002787699516458652}. Best is trial 0 with value: inf.
Trial 68 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  67%|██████▋   | 67/100 [00:19<00:11,  2.94it/s]

[I 2026-02-11 22:48:18,514] Trial 67 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 3, 'dropout': 0.21164389501657285, 'lr': 0.011877589486382855, 'wd': 4.614306669519208e-06, 'factor': 0.2006192102533471, 'patience': 5, 'batch_size': 32, 'seq_len': 3, 'l1_lambda': 0.00024105215220064324}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  68%|██████▊   | 68/100 [00:19<00:10,  3.05it/s]

Trial 69 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:18,791] Trial 68 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.18105213403645598, 'lr': 0.0024561789042742066, 'wd': 7.419630502356009e-06, 'factor': 0.42933664665509536, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 0.009303642019128878}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  68%|██████▊   | 68/100 [00:19<00:10,  3.05it/s]

[I 2026-02-11 22:48:19,360] Trial 69 finished with value: inf and parameters: {'num_channels': [64, 64, 64], 'kernel_size': 3, 'dropout': 0.17118712193999897, 'lr': 0.0010077154985144636, 'wd': 5.4439018190533916e-05, 'factor': 0.34107040800581045, 'patience': 10, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 8.331955990394292e-05}. Best is trial 0 with value: inf.Trial 70 failed: ray_train() got an unexpected keyword argument 'trial'


Progress: 70/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  69%|██████▉   | 69/100 [00:19<00:11,  2.65it/s]

[I 2026-02-11 22:48:19,626] Trial 70 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 5, 'dropout': 0.30895577575908545, 'lr': 0.022491150186924113, 'wd': 1.2210899657694369e-05, 'factor': 0.49701731672844146, 'patience': 10, 'batch_size': 64, 'seq_len': 24, 'l1_lambda': 8.706167797590194e-05}. Best is trial 0 with value: inf.
Trial 71 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 72 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  71%|███████   | 71/100 [00:20<00:08,  3.52it/s]

[I 2026-02-11 22:48:20,161] Trial 71 finished with value: inf and parameters: {'num_channels': [64], 'kernel_size': 4, 'dropout': 0.22884918523559322, 'lr': 0.0019173956649654387, 'wd': 1.4439201749302884e-06, 'factor': 0.3648812821469113, 'patience': 5, 'batch_size': 16, 'seq_len': 12, 'l1_lambda': 3.577311202306873e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  72%|███████▏  | 72/100 [00:20<00:07,  3.55it/s]

[I 2026-02-11 22:48:20,189] Trial 72 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 2, 'dropout': 0.11048136767748083, 'lr': 0.010101093284609038, 'wd': 2.845917833115491e-06, 'factor': 0.21517260686684125, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 1.45370795240426e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  73%|███████▎  | 73/100 [00:20<00:07,  3.44it/s]

Trial 73 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:20,823] Trial 73 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 4, 'dropout': 0.15231054788739964, 'lr': 0.0024329531286551237, 'wd': 1.659090474271898e-05, 'factor': 0.20188672873697272, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 1.6082012641190676e-05}. Best is trial 0 with value: inf.
Trial 76 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 74 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  74%|███████▍  | 74/100 [00:21<00:08,  3.20it/s]

Trial 75 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:20,882] Trial 76 finished with value: inf and parameters: {'num_channels': [64, 64, 64], 'kernel_size': 2, 'dropout': 0.2572615482511811, 'lr': 0.010133714192655336, 'wd': 9.926568013478114e-06, 'factor': 0.4546246027698872, 'patience': 10, 'batch_size': 16, 'seq_len': 12, 'l1_lambda': 2.0545013090018643e-05}. Best is trial 0 with value: inf.

Progress: 75/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  74%|███████▍  | 74/100 [00:21<00:08,  3.20it/s]

[I 2026-02-11 22:48:20,893] Trial 74 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 3, 'dropout': 0.12078698932261173, 'lr': 0.001733940593737401, 'wd': 1.1838459678213705e-05, 'factor': 0.2191752263833761, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 1.853036405170486e-05}. Best is trial 0 with value: inf.
Trial 77 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  76%|███████▌  | 76/100 [00:21<00:08,  2.98it/s]

[I 2026-02-11 22:48:21,181] Trial 75 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 4, 'dropout': 0.12291057737422446, 'lr': 0.00438775234165334, 'wd': 1.6669292953662294e-05, 'factor': 0.23108565196020883, 'patience': 5, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 2.3502111184930586e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  77%|███████▋  | 77/100 [00:22<00:07,  3.02it/s]

[I 2026-02-11 22:48:21,738] Trial 77 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 3, 'dropout': 0.2134090608293586, 'lr': 0.0032116312095624207, 'wd': 2.6657597924449172e-05, 'factor': 0.334715855761734, 'patience': 10, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.001359241613304547}. Best is trial 0 with value: inf.
Trial 78 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  78%|███████▊  | 78/100 [00:22<00:07,  3.09it/s]

Trial 79 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:22,055] Trial 78 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 2, 'dropout': 0.17821715001616456, 'lr': 0.004559853520882641, 'wd': 1.9884944254167693e-05, 'factor': 0.2778515198900361, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 2.1641712815827697e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  78%|███████▊  | 78/100 [00:22<00:07,  3.09it/s]

[I 2026-02-11 22:48:22,343] Trial 79 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 3, 'dropout': 0.29124441549770114, 'lr': 0.003847220376336277, 'wd': 3.125723697653674e-06, 'factor': 0.27500525562958067, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 2.2675869118656623e-05}. Best is trial 0 with value: inf.
Trial 80 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  80%|████████  | 80/100 [00:23<00:07,  2.73it/s]


Progress: 80/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------

Trial 81 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:22,695] Trial 80 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 3, 'dropout': 0.31410576174529553, 'lr': 0.0067903209405756485, 'wd': 1.3340106472363604e-05, 'factor': 0.3625240089743154, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 2.0352835903864194e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  81%|████████  | 81/100 [00:23<00:07,  2.39it/s]

Trial 82 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:23,003] Trial 81 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 2, 'dropout': 0.11852413858159094, 'lr': 0.0025050052942288733, 'wd': 2.0447831535064847e-05, 'factor': 0.22123552475697095, 'patience': 10, 'batch_size': 16, 'seq_len': 18, 'l1_lambda': 1.85506818976431e-05}. Best is trial 0 with value: inf.
Trial 83 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  82%|████████▏ | 82/100 [00:23<00:07,  2.39it/s]

[I 2026-02-11 22:48:23,290] Trial 82 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 4, 'dropout': 0.36862671255366347, 'lr': 0.008696131389011201, 'wd': 2.3782314030385043e-05, 'factor': 0.3643568717432273, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 0.00041510077207274976}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  83%|████████▎ | 83/100 [00:24<00:05,  3.06it/s]

[I 2026-02-11 22:48:23,580] Trial 83 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.2933170604157092, 'lr': 0.004533057589442989, 'wd': 1.5055183912830267e-06, 'factor': 0.2759049231549428, 'patience': 5, 'batch_size': 64, 'seq_len': 6, 'l1_lambda': 0.0001974336817018802}. Best is trial 0 with value: inf.
Trial 85 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 84 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  83%|████████▎ | 83/100 [00:24<00:05,  3.06it/s]

[I 2026-02-11 22:48:23,971] Trial 85 finished with value: inf and parameters: {'num_channels': [64], 'kernel_size': 4, 'dropout': 0.15137720816470593, 'lr': 0.002349403337947591, 'wd': 4.644619751898043e-06, 'factor': 0.3798737807808141, 'patience': 10, 'batch_size': 64, 'seq_len': 24, 'l1_lambda': 0.00011492445063018569}. Best is trial 0 with value: inf.

Progress: 85/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  85%|████████▌ | 85/100 [00:24<00:05,  2.83it/s]

[I 2026-02-11 22:48:23,975] Trial 84 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 2, 'dropout': 0.15894375371757838, 'lr': 0.0031377555046991344, 'wd': 1.1529120146954207e-05, 'factor': 0.2769957947837343, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 5.525757190607219e-05}. Best is trial 0 with value: inf.
Trial 86 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  85%|████████▌ | 85/100 [00:24<00:05,  2.83it/s]

[I 2026-02-11 22:48:24,856] Trial 86 finished with value: inf and parameters: {'num_channels': [256, 256, 256], 'kernel_size': 4, 'dropout': 0.24953371475462033, 'lr': 0.0013252007353955777, 'wd': 2.0134549184215078e-06, 'factor': 0.4259979491459471, 'patience': 5, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 9.63742515812265e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  87%|████████▋ | 87/100 [00:25<00:04,  2.78it/s]

Trial 87 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 90 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 88 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 89 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:25,147] Trial 87 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 3, 'dropout': 0.3748514155776259, 'lr': 0.015070703829408071, 'wd': 8.828210171053185e-06, 'factor': 0.4575541697036888, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 3.534831599192668e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  88%|████████▊ | 88/100 [00:25<00:04,  2.80it/s]

[I 2026-02-11 22:48:25,199] Trial 90 finished with value: inf and parameters: {'num_channels': [128, 128, 128], 'kernel_size': 4, 'dropout': 0.3919245336236182, 'lr': 0.013166080111305411, 'wd': 3.235858441964197e-05, 'factor': 0.4162263908553214, 'patience': 10, 'batch_size': 32, 'seq_len': 18, 'l1_lambda': 2.6322354773456814e-05}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  89%|████████▉ | 89/100 [00:25<00:03,  3.00it/s]

[I 2026-02-11 22:48:25,201] Trial 88 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 5, 'dropout': 0.23771991762817263, 'lr': 0.031014115381391677, 'wd': 5.2758190832184365e-06, 'factor': 0.44990716101313466, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 9.400703137261927e-05}. Best is trial 0 with value: inf.

Progress: 90/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------



Best trial: 0. Best value: inf:  89%|████████▉ | 89/100 [00:26<00:03,  3.00it/s]

Trial 91 failed: ray_train() got an unexpected keyword argument 'trial'[I 2026-02-11 22:48:25,240] Trial 89 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 4, 'dropout': 0.1663468436803403, 'lr': 0.016505389536241897, 'wd': 1.0725855160634628e-05, 'factor': 0.20086402613279036, 'patience': 10, 'batch_size': 32, 'seq_len': 6, 'l1_lambda': 0.0015140991575309539}. Best is trial 0 with value: inf.



Best trial: 0. Best value: inf:  90%|█████████ | 90/100 [00:26<00:04,  2.50it/s]

Trial 92 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:26,104] Trial 91 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 3, 'dropout': 0.1304537851733832, 'lr': 0.0005123085479490382, 'wd': 3.717299476039799e-06, 'factor': 0.33021666635092506, 'patience': 5, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.00583883158038874}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  92%|█████████▏| 92/100 [00:27<00:03,  2.49it/s]

[I 2026-02-11 22:48:26,358] Trial 92 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 3, 'dropout': 0.10490459067630561, 'lr': 0.0008347291482229618, 'wd': 5.32641976776715e-06, 'factor': 0.23863878396981014, 'patience': 5, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.0002121407865866019}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  93%|█████████▎| 93/100 [00:27<00:02,  2.67it/s]

Trial 93 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:26,953] Trial 93 finished with value: inf and parameters: {'num_channels': [128, 128], 'kernel_size': 5, 'dropout': 0.19056697554142782, 'lr': 0.0007603761128163897, 'wd': 2.4699976114959983e-06, 'factor': 0.29675204710007647, 'patience': 5, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.0003839537793656521}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  94%|█████████▍| 94/100 [00:27<00:02,  2.77it/s]

Trial 95 failed: ray_train() got an unexpected keyword argument 'trial'
Trial 94 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:27,288] Trial 95 finished with value: inf and parameters: {'num_channels': [64, 64], 'kernel_size': 2, 'dropout': 0.3231604115429817, 'lr': 0.03401523532422929, 'wd': 1.1729912152039511e-05, 'factor': 0.45998880254152963, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 0.00010700764247236302}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  94%|█████████▍| 94/100 [00:27<00:02,  2.77it/s]


Progress: 95/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------

Trial 96 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:27,314] Trial 94 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 2, 'dropout': 0.2588230713488581, 'lr': 0.006897668446324357, 'wd': 5.385968486382567e-06, 'factor': 0.48485059167859196, 'patience': 10, 'batch_size': 64, 'seq_len': 12, 'l1_lambda': 0.00018644296779741443}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  96%|█████████▌| 96/100 [00:28<00:01,  2.61it/s]

Trial 97 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:27,576] Trial 96 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 2, 'dropout': 0.2742012538641218, 'lr': 0.0006694114207556116, 'wd': 2.140183555462775e-05, 'factor': 0.370163411247817, 'patience': 5, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.0003150697041191905}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  97%|█████████▋| 97/100 [00:28<00:01,  2.77it/s]

Trial 98 failed: ray_train() got an unexpected keyword argument 'trial'
[I 2026-02-11 22:48:27,928] Trial 97 finished with value: inf and parameters: {'num_channels': [256, 256], 'kernel_size': 3, 'dropout': 0.398288178535458, 'lr': 0.012074920747470465, 'wd': 1.4731860247244122e-05, 'factor': 0.4062763233046294, 'patience': 10, 'batch_size': 64, 'seq_len': 18, 'l1_lambda': 0.0006224268266008826}. Best is trial 0 with value: inf.
Trial 99 failed: ray_train() got an unexpected keyword argument 'trial'


Best trial: 0. Best value: inf:  97%|█████████▋| 97/100 [00:28<00:01,  2.77it/s]

[I 2026-02-11 22:48:28,221] Trial 98 finished with value: inf and parameters: {'num_channels': [256], 'kernel_size': 3, 'dropout': 0.23179112528636286, 'lr': 0.000510337553979427, 'wd': 6.129562423547996e-06, 'factor': 0.22185213841067541, 'patience': 5, 'batch_size': 32, 'seq_len': 24, 'l1_lambda': 0.0005849531600666792}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf:  99%|█████████▉| 99/100 [00:28<00:00,  2.71it/s]

[I 2026-02-11 22:48:28,236] Trial 99 finished with value: inf and parameters: {'num_channels': [128], 'kernel_size': 3, 'dropout': 0.15799491185309084, 'lr': 0.0011839865028727922, 'wd': 5.288421426915471e-06, 'factor': 0.2904766852451249, 'patience': 5, 'batch_size': 16, 'seq_len': 24, 'l1_lambda': 0.0005054031705634077}. Best is trial 0 with value: inf.


Best trial: 0. Best value: inf: 100%|██████████| 100/100 [00:29<00:00,  3.42it/s]
[W 2026-02-11 22:48:29,296] Trial 0 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,303] Trial 1 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,304] Trial 2 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,305] Trial 3 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,305] Trial 4 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,306] Trial 5 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,306] Trial 6 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,306] Trial 7 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,307] Trial 8 is omitted in visualizatio


Progress: 100/100 trials | Best loss: inf
Trial    State        Loss         Std          hidden_size  lr           batch_size  
----------------------------------------------------------------------------------------------------


Best Hyperparameters:
num_channels: [256, 256]
kernel_size: 3
dropout: 0.18012931121174408
lr: 0.0027922133130790848
wd: 1.056526508740203e-05
factor: 0.4246376248898365
patience: 10
batch_size: 32
seq_len: 18
l1_lambda: 0.007027019122754872
lag_periods: [1, 3, 12]
features: ['BIR', 'BOC', 'Other Offices', 'TotalTrade_PHPMN', 'NominalGDP_disagg', 'Pop_disagg']
labels: ['BIR', 'BOC', 'Other Offices']
dummy_vars: ['COVID-19', 'TRAIN', 'CREATE', 'FIST', 'BIR_COMM']
experiment_name: test

Best CV Loss: inf

Config saved to: configs/test/best_config.json
Metadata saved to: configs/test/metadata.json

Study results saved to: c:\Users\Jojie\Desktop\School\Masters\Sem 2\01 Capstone\TCN\ray_results\gru_tuning/
  - study.pkl (complete study object)
  - trials.csv (al

[W 2026-02-11 22:48:29,335] Trial 61 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,336] Trial 62 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,337] Trial 63 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,339] Trial 64 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,339] Trial 65 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,340] Trial 66 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,340] Trial 67 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,340] Trial 68 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,341] Trial 69 is omitted in visualization because its objective value is inf or nan.
[W 2026-02-11 22:48:29,341] 


Visualizations saved to configs/test/

TUNING COMPLETE


In [36]:
exp_folder = "configs/test"

config_path = f"{exp_folder}/best_config.json"

with open(config_path, "r") as f:
     best_config = json.load(f)
# Create args with best config
args = train.Arguments(
    **best_config,
    seed=1,
    epoch=200,  # Train longer for final model
)

# Set device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    
args.device = device
args.use_lags = True
args.lag_periods = [1, 3]
# Load dataset
dataset = train.load_dataset(args)

train.set_seed(args.seed)
exp_name = args.experiment_name if hasattr(args, 'experiment_name') else 'final'

# Split cv_data into train/val
train_size = int(0.85 * len(dataset['cv_data']))
final_train_data = dataset['cv_data'][:train_size]
final_train_labels = dataset['cv_labels'][:train_size]
final_val_data = dataset['cv_data'][train_size:]
final_val_labels = dataset['cv_labels'][train_size:]

# Scale data
exp_name = args.experiment_name if hasattr(args, 'experiment_name') else 'final'

# Scale data with organized folder structure
final_train_scaled = transform_data(final_train_data, f"Transforms/{exp_name}/train_scaled.pkl")
final_train_labels_scaled = transform_data(final_train_labels, f"Transforms/{exp_name}/labels_scaled.pkl")
final_val_scaled = transform_data(final_val_data, f"Transforms/{exp_name}/train_scaled.pkl")
final_val_labels_scaled = transform_data(final_val_labels, f"Transforms/{exp_name}/labels_scaled.pkl")
final_test_scaled = transform_data(dataset['test_data'], f"Transforms/{exp_name}/test_scaled.pkl")
final_test_labels_scaled = transform_data(dataset['test_labels'], f"Transforms/{exp_name}/labels_scaled.pkl")

# Create datasets
train_dataset = TimeSeriesDataset(final_train_scaled, final_train_labels_scaled, seq_len=args.seq_len)
val_dataset = TimeSeriesDataset(final_val_scaled, final_val_labels_scaled, seq_len=args.seq_len)
test_dataset = TimeSeriesDataset(final_test_scaled, final_test_labels_scaled, seq_len=args.seq_len)

# Create loaders
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)

In [44]:
# Create final model
train.set_seed(args.seed)
final_model = TCNRegressor(
    input_size=dataset['input_size'],  
    num_channels=args.num_channels,
    output_size=dataset['output_size'],  
    kernel_size=args.kernel_size,
    dropout=args.dropout
).to(device)

args.train_criterion = nn.HuberLoss(delta=1.0)
#args.train_criterion = nn.MSELoss()
args.test_criterion = train.MAPELoss()
args.final = True
# Train and evaluate
test_loss, test_preds, train_losses, val_losses = train.run(
    final_model,
    train_loader,
    val_loader,
    test_loader,
    args,
    fold=None
)

print(f"\n{'='*50}")
print(f"Final Model Results:")
print(f"{'='*50}")
print(f"Final Test Loss (MAPE): {test_loss:.4f}%")


`torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.



RuntimeError: Given groups=1, weight of size [256, 23, 3], expected input[32, 18, 23] to have 23 channels, but got 18 channels instead

In [None]:
# Debug: Check actual input shape
print("="*50)
print("Dataset Info:")
print("="*50)
print(f"Reported input_size: {dataset['input_size']}")
print(f"Reported output_size: {dataset['output_size']}")

# Get actual shape from dataloader
for batch_x, batch_y in train_loader:
    print(f"\nActual batch input shape: {batch_x.shape}")
    print(f"Actual batch output shape: {batch_y.shape}")
    actual_input_size = batch_x.shape[1]  # Number of channels
    print(f"Actual input channels: {actual_input_size}")
    print(f"Actual output channels: {batch_y.shape[1]}")
    break

# Create final model with actual input size
train.set_seed(args.seed)
final_model = TCNRegressor(
    input_size=actual_input_size,  # Use actual shape from data
    num_channels=args.num_channels,
    output_size=batch_y.shape[1],  # Use actual output channels
    kernel_size=args.kernel_size,
    dropout=args.dropout
).to(device)

print(f"\n{'='*50}")
print(f"Model instantiated with:")
print(f"  input_size={actual_input_size}")
print(f"  num_channels={args.num_channels}")
print(f"  output_size={batch_y.shape[1]}")
print(f"  kernel_size={args.kernel_size}")
print(f"  dropout={args.dropout}")
print(f"{'='*50}\n")

args.train_criterion = nn.HuberLoss(delta=1.0)
args.test_criterion = train.MAPELoss()
args.final = True

# Train and evaluate
test_loss, test_preds, train_losses, val_losses = train.run(
    final_model,
    train_loader,
    val_loader,
    test_loader,
    args,
    fold=None
)

print(f"\n{'='*50}")
print(f"Final Model Results:")
print(f"{'='*50}")
print(f"Final Test Loss (MAPE): {test_loss:.4f}%")

Dataset Info:
Reported input_size: 23
Reported output_size: 3

Actual batch input shape: torch.Size([32, 18, 23])
Actual batch output shape: torch.Size([32, 3])
Actual input channels: 18
Actual output channels: 3

Model instantiated with:
  input_size=18
  num_channels=[256, 256]
  output_size=3
  kernel_size=3
  dropout=0.18012931121174408




`torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.



Epoch 1/200 - Train Loss: 82.4182, Val Loss: 0.1186
Epoch 100/200 - Train Loss: 1.2167, Val Loss: 0.0369
Epoch 200/200 - Train Loss: 1.2058, Val Loss: 0.0353

Final Model Results:
Final Test Loss (MAPE): 26.6850%


In [45]:
# DEBUG: Track which channels/features are included
import os

base_dir = os.path.dirname(os.path.abspath(train.__file__))
data_dir = os.path.join(base_dir, "Data")

# Load raw data
btr_data = pd.read_csv(os.path.join(data_dir, "cordata.csv")).set_index("Date")
macro_data = pd.read_csv(os.path.join(data_dir, "disaggregated.csv")).fillna(0)
macro_data = macro_data.rename(columns={'Unnamed: 3': 'Date'}).set_index('Date')
dummy = pd.read_csv(os.path.join(data_dir, "dummy.csv")).fillna(0).set_index("Date")

# Check what's available
print("="*80)
print("CHANNEL AVAILABILITY ANALYSIS")
print("="*80)

features = args.features if hasattr(args, 'features') else []
labels = args.labels if hasattr(args, 'labels') else []
dummy_vars = args.dummy_vars if hasattr(args, 'dummy_vars') else []

print(f"\nFeatures requested: {features}")
print(f"Features found in data: {[f for f in features if f in btr_data.columns or f in macro_data.columns]}")
print(f"Features MISSING: {[f for f in features if f not in btr_data.columns and f not in macro_data.columns]}")

print(f"\nLabels requested: {labels}")
print(f"Labels found: {[l for l in labels if l in btr_data.columns]}")

print(f"\nDummy vars requested: {dummy_vars}")
print(f"Dummy vars available in dummy.csv: {list(dummy.columns)}")
print(f"Dummy vars FOUND in data: {[d for d in dummy_vars if d in dummy.columns]}")
print(f"Dummy vars MISSING: {[d for d in dummy_vars if d not in dummy.columns]}")

# Calculate expected channels
lag_count = len(args.lag_periods) * len(labels)
main_features_count = len([f for f in features if f in btr_data.columns or f in macro_data.columns])
dummy_count = len([d for d in dummy_vars if d in dummy.columns])
seasonal_features = ['month_sin', 'month_cos', 'quarter_sin', 'quarter_cos', 'is_tax_season', 'is_year_end']
seasonal_count = len(seasonal_features)

print("\n" + "="*80)
print("CHANNEL COUNT BREAKDOWN:")
print("="*80)
print(f"Main features: {main_features_count}")
print(f"Lag features (labels × lags): {lag_count} ({len(labels)} labels × {len(args.lag_periods)} lags)")
print(f"Dummy variables: {dummy_count}")
print(f"Seasonal features: {seasonal_count}")
print(f"{'─'*80}")
print(f"EXPECTED total: {main_features_count + lag_count + dummy_count + seasonal_count}")
print(f"ACTUAL (from data): {dataset['input_size']}")
print(f"DIFFERENCE: {main_features_count + lag_count + dummy_count + seasonal_count - dataset['input_size']}")
print("="*80)

CHANNEL AVAILABILITY ANALYSIS

Features requested: ['BIR', 'BOC', 'Other Offices', 'TotalTrade_PHPMN', 'NominalGDP_disagg', 'Pop_disagg']
Features found in data: ['BIR', 'BOC', 'Other Offices', 'TotalTrade_PHPMN', 'NominalGDP_disagg', 'Pop_disagg']
Features MISSING: []

Labels requested: ['BIR', 'BOC', 'Other Offices']
Labels found: ['BIR', 'BOC', 'Other Offices']

Dummy vars requested: ['COVID-19', 'TRAIN', 'CREATE', 'FIST', 'BIR_COMM']
Dummy vars available in dummy.csv: ['COVID-19', 'TRAIN', 'CREATE', 'FIST', 'BIR_COMM', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20']
Dummy vars FOUND in data: ['COVID-19', 'TRAIN', 'CREATE', 'FIST', 'BIR_COMM']
Dummy vars MISSING: []

CHANNEL COUNT BREAKDOWN:
Main features: 6
Lag features (labels × lags): 6 (3 labels × 2 lags)
Dummy variables: 5
Seasonal features: 6
──────────────

In [48]:
batch_x

tensor([[[0.0807, 0.0347, 0.0601,  ..., 0.0000, 1.0000, 0.0000],
         [0.0021, 0.0126, 0.0559,  ..., 0.0000, 1.0000, 0.0000],
         [0.0155, 0.0428, 0.0250,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0389, 0.0904, 0.1077,  ..., 0.5000, 0.0000, 0.0000],
         [0.0325, 0.0836, 0.0934,  ..., 0.5000, 0.0000, 0.0000],
         [0.0127, 0.0768, 0.1220,  ..., 0.5000, 0.0000, 0.0000]],

        [[0.0021, 0.0126, 0.0559,  ..., 0.0000, 1.0000, 0.0000],
         [0.0155, 0.0428, 0.0250,  ..., 0.0000, 0.0000, 0.0000],
         [0.0466, 0.0311, 0.0684,  ..., 0.5000, 0.0000, 0.0000],
         ...,
         [0.0325, 0.0836, 0.0934,  ..., 0.5000, 0.0000, 0.0000],
         [0.0127, 0.0768, 0.1220,  ..., 0.5000, 0.0000, 0.0000],
         [0.0471, 0.0913, 0.0637,  ..., 1.0000, 0.0000, 0.0000]],

        [[0.0155, 0.0428, 0.0250,  ..., 0.0000, 0.0000, 0.0000],
         [0.0466, 0.0311, 0.0684,  ..., 0.5000, 0.0000, 0.0000],
         [0.0217, 0.0259, 0.0999,  ..., 0.5000, 0.0000, 0.