In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
import os
import json
import time
from itertools import product
from tqdm import tqdm 
import optuna

In [2]:
class LSTMTemporalFeatureExtractor(nn.Module):
    """
    LSTM component for hybrid models that produces 32-dimensional temporal features
    Compatible with: CapsNet-LSTM-LightGBM, CNN-LSTM-LightGBM, CapsNet-LSTM
    """
    def __init__(self, input_size, hidden_size=64, num_layers=1, dropout=0.2, 
                 activation='relu', lstm_dropout=0.0):
        super(LSTMTemporalFeatureExtractor, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layers
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers,
            dropout=lstm_dropout if num_layers > 1 else 0,
            batch_first=True
        )
        
        # Activation function
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'gelu':
            self.activation = nn.GELU()
        else:
            self.activation = nn.ReLU()
        
        self.dropout = nn.Dropout(dropout)
        
        # Feature extraction layer (32 dimensions for hybrid integration)
        self.temporal_feature_layer = nn.Linear(hidden_size, 32)
        
        # Optional prediction layer (for CapsNet-LSTM model without LightGBM)
        self.prediction_layer = nn.Linear(32, 1)
        
    def forward(self, x, return_features_only=False):
        """
        Forward pass with option to return only temporal features or full prediction
        
        Args:
            x: Input sequences (batch_size, seq_len, features)
            return_features_only: If True, returns only 32-dim temporal features
                                If False, returns both features and prediction
        
        Returns:
            temporal_features: 32-dimensional temporal features for hybrid models
            prediction: PM2.5 prediction (only if return_features_only=False)
        """
        # LSTM forward pass
        lstm_out, (hn, cn) = self.lstm(x)
        last_output = lstm_out[:, -1, :]  # Use last timestep output
        
        # Extract 32-dimensional temporal features
        temporal_features = self.activation(self.temporal_feature_layer(last_output))
        temporal_features = self.dropout(temporal_features)
        
        if return_features_only:
            return temporal_features
        else:
            # For CapsNet-LSTM model (without LightGBM)
            prediction = self.prediction_layer(temporal_features)
            return temporal_features, prediction

In [3]:
class LSTMTemporalFeatureGenerator:
    """
    LSTM component for temporal feature extraction in hybrid models
    Designed to work within full pipeline cross-validation
    """
    def __init__(self, best_params=None):
        # Default hyperparameters (should be determined through separate tuning)
        self.default_params = {
            'hidden_size': 64,
            'num_layers': 1,
            'dropout': 0.2,
            'activation': 'relu',
            'learning_rate': 0.001,
            'batch_size': 32,
            'epochs': 50,
            'timesteps': 60,  # Default temporal window size
            'weight_decay': 0.0,
            'grad_clip': 1.0,
            'lstm_dropout': 0.0
        }
        
        self.params = best_params if best_params else self.default_params
        self.scaler = None
        self.model = None
        
    def prepare_temporal_sequences(self, temporal_data, targets, timesteps=10):
        """
        Prepare temporal sequences for a given fold
        Called during each CV fold by the full pipeline
        """
        # Scale temporal features
        if self.scaler is None:
            self.scaler = MinMaxScaler()
            temporal_data_scaled = self.scaler.fit_transform(temporal_data)
        else:
            temporal_data_scaled = self.scaler.transform(temporal_data)
        
        # Create sequences
        X, y = [], []
        for i in range(len(temporal_data_scaled) - timesteps):
            X.append(temporal_data_scaled[i:i+timesteps])
            y.append(targets[i+timesteps])
        
        return np.array(X), np.array(y)
    
    def train_and_extract_features(self, train_temporal_data, train_targets, val_temporal_data, val_targets, timesteps=None):
        """
        Train LSTM and extract temporal features for current CV fold
        This is called by the full pipeline during each fold
        
        IMPORTANT: val_temporal_data is validation data from the 80% learning set,
                   NOT the 20% hold-out test set (to prevent data leakage)
        
        Returns:
            train_features: 32D temporal features for training data
            val_features: 32D temporal features for validation data (within learning set)
        """
        # Use timesteps from params or default
        if timesteps is None:
            timesteps = self.params.get('timesteps', 60)
            
        # Prepare sequences for training
        X_train, y_train = self.prepare_temporal_sequences(train_temporal_data, train_targets, timesteps)
        
        # Convert to tensors
        X_train_tensor = torch.FloatTensor(X_train)
        y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
        
        # Create data loader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=self.params['batch_size'], shuffle=True)
        
        # Initialize LSTM model
        self.model = LSTMTemporalFeatureExtractor(
            input_size=train_temporal_data.shape[1],
            hidden_size=self.params['hidden_size'],
            num_layers=self.params['num_layers'],
            dropout=self.params['dropout'],
            activation=self.params['activation'],
            lstm_dropout=self.params.get('lstm_dropout', 0.0)
        )
        
        criterion = nn.MSELoss()
        optimizer = optim.Adam(
            self.model.parameters(), 
            lr=self.params['learning_rate'],
            weight_decay=self.params.get('weight_decay', 0.0)
        )
        
        # Training loop
        self.model.train()
        for epoch in range(self.params['epochs']):
            total_loss = 0
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                _, predictions = self.model(batch_X, return_features_only=False)
                loss = criterion(predictions, batch_y)
                loss.backward()
                
                # Gradient clipping
                torch.nn.utils.clip_grad_norm_(
                    self.model.parameters(), 
                    max_norm=self.params.get('grad_clip', 1.0)
                )
                
                optimizer.step()
                total_loss += loss.item()
        
        # Extract features from training data
        self.model.eval()
        with torch.no_grad():
            train_features = self.model(X_train_tensor, return_features_only=True).numpy()
        
        # Prepare test sequences and extract features
        X_val, _ = self.prepare_temporal_sequences(val_temporal_data, val_targets, timesteps)
        X_val_tensor = torch.FloatTensor(X_val)
        
        with torch.no_grad():
            val_features = self.model(X_val_tensor, return_features_only=True).numpy()
        
        # Debug: Verify numpy arrays are generated correctly
        print(f"  LSTM Debug - Train features shape: {train_features.shape}, type: {type(train_features)}")
        print(f"  LSTM Debug - Val features shape: {val_features.shape}, type: {type(val_features)}")
        print(f"  LSTM Debug - Feature sample: {train_features[0][:5]}...")  # First 5 values
        
        return train_features, val_features, y_train, X_val.shape[0]

In [4]:
class TemporalDataLoader:
    """
    Data loader for temporal features across all days
    Supports full pipeline cross-validation
    """
    def __init__(self, days=['7_24', '10_19', '11_10']):
        self.days = days
        self.temporal_features = ['pm10', 'temperature', 'humidity']
    
    def load_temporal_data(self, day):
        """
        Load temporal data for a specific day
        Returns raw temporal data and targets for CV splitting
        """
        matched_file = f'../../dataset/c_matched_spatio_temporal_data/matched_{day}.csv'
        if not os.path.exists(matched_file):
            raise FileNotFoundError(f"Matched data not found: {matched_file}")
        
        df = pd.read_csv(matched_file)
        
        # Sort by timestamp for proper time series handling
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values('timestamp').reset_index(drop=True)
        
        # Get available temporal features
        available_features = [col for col in self.temporal_features if col in df.columns]
        
        if not available_features:
            raise ValueError(f"No temporal features found in {day}")
        
        # Remove rows with missing values
        required_cols = available_features + ['pm2.5']
        df_clean = df.dropna(subset=required_cols)
        
        temporal_data = df_clean[available_features].values
        targets = df_clean['pm2.5'].values
        
        return temporal_data, targets, available_features

In [None]:
def hyperparameter_tuning_lstm(day, n_trials=20):
    """
    Hyperparameter tuning for LSTM component using Optuna.
    """
    print(f"LSTM Hyperparameter Tuning for {day}")
    print("=" * 50)

    # Load data
    data_loader = TemporalDataLoader()
    temporal_data, targets, feature_names = data_loader.load_temporal_data(day)

    n_total = len(temporal_data)
    n_learning = int(n_total * 0.8)
    learning_temporal = temporal_data[:n_learning]
    learning_targets = targets[:n_learning]

    n_tune_train = int(n_learning * 0.8)
    tune_train_temporal = learning_temporal[:n_tune_train]
    tune_train_targets = learning_targets[:n_tune_train]
    tune_val_temporal = learning_temporal[n_tune_train:]
    tune_val_targets = learning_targets[n_tune_train:]

    def objective(trial):
        params = {
            'hidden_size': trial.suggest_categorical('hidden_size', [32, 64, 128, 256]),
            'num_layers': trial.suggest_int('num_layers', 1, 3),
            'dropout': trial.suggest_float('dropout', 0.1, 0.2, 0.3, 0.4),
            'activation': trial.suggest_categorical('activation', ['relu', 'tanh', 'gelu']),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.0001, 0.001, 0.01),
            'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64]),
            'epochs': trial.suggest_categorical('epochs', [10, 30, 50]),
            'timesteps': trial.suggest_categorical('timesteps', [10, 30, 40, 60]),
            'weight_decay': trial.suggest_categorical('weight_decay', [0.0, 1e-5, 1e-4]),
            'grad_clip': trial.suggest_categorical('grad_clip', [0.5, 1.0, 2.0]),
            'lstm_dropout': trial.suggest_categorical('lstm_dropout', [0.0, 0.1, 0.2])
        }
        lstm_gen = LSTMTemporalFeatureGenerator(params)
        try:
            train_features, val_features, train_y, val_len = lstm_gen.train_and_extract_features(
                tune_train_temporal, tune_train_targets, tune_val_temporal, tune_val_targets,
                timesteps=params['timesteps']
            )
            val_y = tune_val_targets[params['timesteps']:]
            if len(val_y) == len(val_features):
                from sklearn.linear_model import LinearRegression
                lr = LinearRegression()
                lr.fit(train_features, train_y)
                val_pred = lr.predict(val_features)
                mse = mean_squared_error(val_y, val_pred)
                return mse
            else:
                return float('inf')
        except Exception as e:
            print(f"Optuna trial error: {e}")
            return float('inf')

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

    best_params = study.best_params
    best_score = study.best_value

    os.makedirs('models/lstm_temporal', exist_ok=True)
    with open(f'models/lstm_temporal/{day}_best_params.json', 'w') as f:
        json.dump(best_params, f, indent=2)

    print(f"Best parameters for {day}: {best_params}")
    print(f"Best validation RMSE: {np.sqrt(best_score):.4f}")

    return best_params

In [6]:
def main():
    """
    Main execution for LSTM temporal feature extraction
    Focused on component preparation for full pipeline CV
    """
    print("="*80)
    print("LSTM TEMPORAL FEATURE EXTRACTION - PIPELINE READY")
    print("="*80)
    print("Purpose: Prepare LSTM component for full pipeline cross-validation")
    print("- Hyperparameter tuning for each day")
    print("- Component ready for CV integration")
    print("- No standalone evaluation (done in full pipeline)")
    print("="*80)
    
    days = ['7_24', '10_19', '11_10']
    
    # Step 1: Hyperparameter tuning for each day
    print("\nStep 1: LSTM Hyperparameter Tuning")
    print("-" * 50)
    
    for day in days:
        try:
            print(f"\nTuning LSTM parameters for {day}...")
            best_params = hyperparameter_tuning_lstm(day, n_trials=20)
            print(f" {day} tuning complete")
            
        except Exception as e:
            print(f" Error tuning {day}: {str(e)}")
    
    print(f"\n{'='*80}")
    print("LSTM COMPONENT PREPARATION COMPLETE")
    print("="*80)
    print("Best parameters saved for each day")
    print("LSTMTemporalFeatureGenerator class ready")
    print("TemporalDataLoader class ready")
    print("\nNext Steps:")
    print("1. Integration team: Use LSTMTemporalFeatureGenerator in full pipeline")
    print("2. Full pipeline CV: Wrap entire models in 5-fold cross-validation")
    print("3. Models to evaluate: CapsNet-LSTM-LightGBM, CNN-LSTM-LightGBM, CapsNet-LSTM")
    print("4. Statistical analysis: Compare 5-fold results across models")
    print("="*80)
    
    # Example usage for integration team
    print("\n" + "="*80)
    print("CORRECT 5-FOLD CV INTEGRATION:")
    print("="*80)
    print("""
# PHASE 1: Hyperparameter Tuning (run once per day)
from lstm_temporal_feature_generator import hyperparameter_tuning_lstm
best_params = hyperparameter_tuning_lstm('7_24', max_combinations=10)

# PHASE 2: 5-Fold Cross-Validation (main pipeline)
from lstm_temporal_feature_generator import LSTMTemporalFeatureGenerator, TemporalDataLoader
from sklearn.model_selection import TimeSeriesSplit

# Initialize components
data_loader = TemporalDataLoader()
lstm_generator = LSTMTemporalFeatureGenerator(best_params)

# Load data and split into learning (80%) and hold-out (20%)
temporal_data, targets, _ = data_loader.load_temporal_data('7_24')
n_total = len(temporal_data)
n_learning = int(n_total * 0.8)

learning_temporal = temporal_data[:n_learning]  # 80% learning set
learning_targets = targets[:n_learning]
holdout_temporal = temporal_data[n_learning:]   # 20% hold-out (untouched)
holdout_targets = targets[n_learning:]

# 5-Fold TimeSeriesSplit on 80% learning set
tscv = TimeSeriesSplit(n_splits=5)
fold_scores = []

for fold, (train_idx, val_idx) in enumerate(tscv.split(learning_temporal)):
    print(f"Fold {fold + 1}/5")
    
    # Split data for this fold (expanding window)
    train_temporal = learning_temporal[train_idx]  # Growing train set
    val_temporal = learning_temporal[val_idx]      # Fixed-size val set
    train_targets = learning_targets[train_idx]
    val_targets = learning_targets[val_idx]
    
    # Extract temporal features for this fold
    train_temp_features, val_temp_features, _, _ = lstm_generator.train_and_extract_features(
        train_temporal, train_targets, val_temporal, val_targets
    )
    
    # Extract spatial features (CapsNet/CNN)
    train_spatial_features, val_spatial_features = capsnet_generator.train_and_extract_features(
        train_images, train_targets, val_images, val_targets
    )
    
    # Combine features
    train_combined = np.concatenate([train_temp_features, train_spatial_features], axis=1)
    val_combined = np.concatenate([val_temp_features, val_spatial_features], axis=1)
    
    # Train LightGBM meta-learner
    lightgbm_model = LGBMRegressor(best_lightgbm_params)
    lightgbm_model.fit(train_combined, train_targets)
    
    # Validate on this fold
    val_predictions = lightgbm_model.predict(val_combined)
    fold_rmse = np.sqrt(mean_squared_error(val_targets, val_predictions))
    fold_scores.append(fold_rmse)
    
    print(f"  Fold {fold + 1} RMSE: {fold_rmse:.4f}")

# Calculate cross-validation performance
cv_mean = np.mean(fold_scores)
cv_std = np.std(fold_scores)
print(f"5-Fold CV Results: {cv_mean:.4f} ± {cv_std:.4f}")
""")
    print("="*80)

In [None]:
if __name__ == "__main__":
    main()

[I 2025-07-20 21:40:14,452] A new study created in memory with name: no-name-2469927d-d3fa-48e4-8393-a18f74de4271


LSTM TEMPORAL FEATURE EXTRACTION - PIPELINE READY
Purpose: Prepare LSTM component for full pipeline cross-validation
- Hyperparameter tuning for each day
- Component ready for CV integration
- No standalone evaluation (done in full pipeline)

Step 1: LSTM Hyperparameter Tuning
--------------------------------------------------

Tuning LSTM parameters for 7_24...
LSTM Hyperparameter Tuning for 7_24


  0%|          | 0/20 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41346, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10330, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [-0.9999484  -0.9999732  -0.9998846   0.99989736  0.9998976 ]...
[I 2025-07-20 21:43:56,738] Trial 0 finished with value: 27.931700504416252 and parameters: {'hidden_size': 256, 'num_layers': 2, 'dropout': 0.24825862760482284, 'activation': 'tanh', 'learning_rate': 0.004990872393387714, 'batch_size': 32, 'epochs': 10, 'timesteps': 10, 'weight_decay': 0.0001, 'grad_clip': 1.0, 'lstm_dropout': 0.1}. Best is trial 0 with value: 27.931700504416252.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41316, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10300, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [-0.532787    0.5655723  -0.5365905   0.5017635  -0.32525575]...
[I 2025-07-20 22:10:39,568] Trial 1 finished with value: 27.781587308463614 and parameters: {'hidden_size': 256, 'num_layers': 1, 'dropout': 0.1625644375764122, 'activation': 'tanh', 'learning_rate': 0.000174740621040118, 'batch_size': 32, 'epochs': 50, 'timesteps': 40, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.0}. Best is trial 1 with value: 27.781587308463614.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41316, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10300, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 0.23215638  0.23297954 -0.2530873   0.24394113  0.19467811]...
[I 2025-07-20 22:17:14,139] Trial 2 finished with value: 30.332522726196114 and parameters: {'hidden_size': 128, 'num_layers': 1, 'dropout': 0.16197944380275936, 'activation': 'tanh', 'learning_rate': 0.005307987461054677, 'batch_size': 64, 'epochs': 30, 'timesteps': 40, 'weight_decay': 0.0001, 'grad_clip': 1.0, 'lstm_dropout': 0.1}. Best is trial 1 with value: 27.781587308463614.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41326, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10310, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [3.459734  3.6476264 2.4506316 2.9092717 2.802512 ]...
[I 2025-07-20 22:24:10,555] Trial 3 finished with value: 36.43886524050005 and parameters: {'hidden_size': 32, 'num_layers': 2, 'dropout': 0.15621609634952724, 'activation': 'relu', 'learning_rate': 0.00047961376598915534, 'batch_size': 32, 'epochs': 30, 'timesteps': 30, 'weight_decay': 0.0, 'grad_clip': 2.0, 'lstm_dropout': 0.2}. Best is trial 1 with value: 27.781587308463614.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41326, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10310, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [0.        0.        1.2896491 0.        1.2323089]...
[I 2025-07-20 22:37:52,146] Trial 4 finished with value: 37.04295588890999 and parameters: {'hidden_size': 64, 'num_layers': 2, 'dropout': 0.30538749786380187, 'activation': 'relu', 'learning_rate': 0.0006104724852012556, 'batch_size': 32, 'epochs': 50, 'timesteps': 30, 'weight_decay': 0.0, 'grad_clip': 1.0, 'lstm_dropout': 0.0}. Best is trial 1 with value: 27.781587308463614.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41326, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10310, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [-0.43513814 -0.050433   -0.99179256  0.8921299  -0.97314376]...
[I 2025-07-20 23:19:57,591] Trial 5 finished with value: 42.30087757851335 and parameters: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.36703084450987844, 'activation': 'tanh', 'learning_rate': 0.0023183479150438255, 'batch_size': 32, 'epochs': 50, 'timesteps': 30, 'weight_decay': 1e-05, 'grad_clip': 0.5, 'lstm_dropout': 0.2}. Best is trial 1 with value: 27.781587308463614.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41346, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10330, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 0.6623071  -0.15031438 -0.14016235 -0.15714167 -0.15930524]...
[I 2025-07-20 23:25:46,571] Trial 6 finished with value: 34.84665224726374 and parameters: {'hidden_size': 256, 'num_layers': 1, 'dropout': 0.3915039579424653, 'activation': 'gelu', 'learning_rate': 0.0006384689723116393, 'batch_size': 16, 'epochs': 30, 'timesteps': 10, 'weight_decay': 0.0001, 'grad_clip': 1.0, 'lstm_dropout': 0.1}. Best is trial 1 with value: 27.781587308463614.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [1.7109332 0.        1.378523  1.4397821 0.       ]...
[I 2025-07-20 23:29:13,790] Trial 7 finished with value: 27.442024484940397 and parameters: {'hidden_size': 64, 'num_layers': 1, 'dropout': 0.24904618157403233, 'activation': 'relu', 'learning_rate': 0.0006165507072469879, 'batch_size': 32, 'epochs': 30, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.2}. Best is trial 7 with value: 27.442024484940397.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 0.99999416  0.9999995  -0.9999984  -0.9999242   0.99999976]...
[I 2025-07-20 23:52:54,032] Trial 8 finished with value: 27.33516524486622 and parameters: {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.39362222854359386, 'activation': 'tanh', 'learning_rate': 0.00040439843490336266, 'batch_size': 32, 'epochs': 10, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.1}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 2.0821428  -0.1699417  -0.16960336  2.083252   -0.16996965]...
[I 2025-07-21 00:09:43,301] Trial 9 finished with value: 33.660629492362474 and parameters: {'hidden_size': 64, 'num_layers': 3, 'dropout': 0.22742265989462473, 'activation': 'gelu', 'learning_rate': 0.00016012923707732631, 'batch_size': 16, 'epochs': 30, 'timesteps': 60, 'weight_decay': 0.0001, 'grad_clip': 0.5, 'lstm_dropout': 0.1}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [-0.9957112  -0.9839221   0.98153263 -0.9880384   0.9861653 ]...
[I 2025-07-21 00:11:37,363] Trial 10 finished with value: 43.55297534654669 and parameters: {'hidden_size': 32, 'num_layers': 3, 'dropout': 0.3306938818334361, 'activation': 'tanh', 'learning_rate': 0.0017770447761885763, 'batch_size': 64, 'epochs': 10, 'timesteps': 60, 'weight_decay': 1e-05, 'grad_clip': 2.0, 'lstm_dropout': 0.1}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [0. 0. 0. 0. 0.]...
[I 2025-07-21 00:14:16,778] Trial 11 finished with value: 34.02895912959291 and parameters: {'hidden_size': 64, 'num_layers': 2, 'dropout': 0.2838980888456125, 'activation': 'relu', 'learning_rate': 0.0002735713552096249, 'batch_size': 32, 'epochs': 10, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.2}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [2.0132709 0.        1.793696  2.007837  2.0295784]...
[I 2025-07-21 00:15:27,524] Trial 12 finished with value: 32.540958250724664 and parameters: {'hidden_size': 64, 'num_layers': 1, 'dropout': 0.21243893283473625, 'activation': 'relu', 'learning_rate': 0.0013086659102409864, 'batch_size': 32, 'epochs': 10, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.2}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [7.243396 0.       0.       0.       6.292638]...
[I 2025-07-21 01:41:43,648] Trial 13 finished with value: 32.8588419758404 and parameters: {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.10593545229019105, 'activation': 'relu', 'learning_rate': 0.00033157364368797607, 'batch_size': 64, 'epochs': 30, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.2}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 4.1491838   4.982781    4.397761   -0.11286008 -0.16977699]...
[I 2025-07-21 02:02:31,793] Trial 14 finished with value: 38.05210856330205 and parameters: {'hidden_size': 256, 'num_layers': 2, 'dropout': 0.2796606958668747, 'activation': 'gelu', 'learning_rate': 0.000914103698749428, 'batch_size': 32, 'epochs': 10, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.0}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [-0.39827484 -0.39070684 -0.43401685 -0.39898676 -0.44104162]...
[I 2025-07-21 02:13:41,046] Trial 15 finished with value: 32.01594893561809 and parameters: {'hidden_size': 64, 'num_layers': 1, 'dropout': 0.3424061047951089, 'activation': 'tanh', 'learning_rate': 0.0002862615986732835, 'batch_size': 16, 'epochs': 30, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.1}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [4.7541575 5.460004  5.7692413 0.        0.       ]...
[I 2025-07-21 02:22:13,329] Trial 16 finished with value: 32.66314827020818 and parameters: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.2055610449626293, 'activation': 'relu', 'learning_rate': 0.00010079392539744863, 'batch_size': 32, 'epochs': 10, 'timesteps': 60, 'weight_decay': 1e-05, 'grad_clip': 2.0, 'lstm_dropout': 0.2}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41346, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10330, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 0.46256256 -0.17318773  0.49364138 -0.37554586  0.4324383 ]...
[I 2025-07-21 02:22:59,249] Trial 17 finished with value: 34.0472598755481 and parameters: {'hidden_size': 32, 'num_layers': 2, 'dropout': 0.3920626204471143, 'activation': 'tanh', 'learning_rate': 0.0023623525808321236, 'batch_size': 32, 'epochs': 10, 'timesteps': 10, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.1}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41316, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10300, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [0. 0. 0. 0. 0.]...
[I 2025-07-21 12:51:24,932] Trial 18 finished with value: 27.744520330698414 and parameters: {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.26875015283320963, 'activation': 'relu', 'learning_rate': 0.009436485120621502, 'batch_size': 64, 'epochs': 30, 'timesteps': 40, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.2}. Best is trial 8 with value: 27.33516524486622.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (41296, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (10280, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [-0.09192718  0.05786441 -0.07157504  0.15026644 -0.0912802 ]...
[I 2025-07-21 13:00:17,936] Trial 19 finished with value: 33.55156220446712 and parameters: {'hidden_size': 64, 'num_layers': 1, 'dropout': 0.33066074397871553, 'activation': 'gelu', 'learning_rate': 0.001001661186988162, 'batch_size': 16, 'epochs': 50, 'timesteps': 60, 'weight_decay': 1e-05, 'grad_clip': 2.0, 'lstm_dropout': 0.0}. Best is trial 8 with value: 27.33516524486622.
Best parameters for 7_24: {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.39362222854359386, 'activation': 'tanh', 'learning_rate': 0.00040439843490336266, 'batch_size': 32, 'epochs': 10, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.1}
Best validation RMSE: 5.2283
 7_24 tuning complete

Tuning LSTM parameters for 10_19...
LSTM Hype

[I 2025-07-21 13:00:18,490] A new study created in memory with name: no-name-b82a3a87-ad7b-4f60-bfc4-18069d1dc79e


  0%|          | 0/20 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (79208, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (19757, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [-0.87896407  0.86469793  0.85259837 -0.88333374 -0.87272924]...
[I 2025-07-21 13:52:48,898] Trial 0 finished with value: 45.178733589937636 and parameters: {'hidden_size': 64, 'num_layers': 2, 'dropout': 0.3802901620631436, 'activation': 'tanh', 'learning_rate': 0.00035606760221285077, 'batch_size': 16, 'epochs': 30, 'timesteps': 60, 'weight_decay': 0.0001, 'grad_clip': 1.0, 'lstm_dropout': 0.1}. Best is trial 0 with value: 45.178733589937636.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (79258, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (19807, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 0.9998746 -0.9998711 -0.9998178  0.9996819  0.9997524]...
[I 2025-07-21 15:15:15,515] Trial 1 finished with value: 87.42861981247785 and parameters: {'hidden_size': 256, 'num_layers': 3, 'dropout': 0.12357491844550512, 'activation': 'tanh', 'learning_rate': 0.0016567218750908282, 'batch_size': 64, 'epochs': 30, 'timesteps': 10, 'weight_decay': 0.0001, 'grad_clip': 1.0, 'lstm_dropout': 0.0}. Best is trial 0 with value: 45.178733589937636.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (79258, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (19807, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 0.20415491  0.18454538  0.2150461  -0.1846387   0.2279506 ]...
[I 2025-07-21 15:41:02,684] Trial 2 finished with value: 49.14972700665197 and parameters: {'hidden_size': 256, 'num_layers': 1, 'dropout': 0.3605639341265797, 'activation': 'tanh', 'learning_rate': 0.0005315253477015005, 'batch_size': 32, 'epochs': 50, 'timesteps': 10, 'weight_decay': 0.0, 'grad_clip': 2.0, 'lstm_dropout': 0.2}. Best is trial 0 with value: 45.178733589937636.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (79238, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (19787, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 2.4640687  -0.15881956  1.9915441   2.8513205  -0.16736428]...
[I 2025-07-21 15:51:50,038] Trial 3 finished with value: 47.57757611105058 and parameters: {'hidden_size': 32, 'num_layers': 2, 'dropout': 0.1261923690304398, 'activation': 'gelu', 'learning_rate': 0.0018453412867667834, 'batch_size': 64, 'epochs': 30, 'timesteps': 30, 'weight_decay': 0.0, 'grad_clip': 0.5, 'lstm_dropout': 0.1}. Best is trial 0 with value: 45.178733589937636.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),


  LSTM Debug - Train features shape: (79208, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Val features shape: (19757, 32), type: <class 'numpy.ndarray'>
  LSTM Debug - Feature sample: [ 4.0146585  -0.1699429   4.028287    4.410253   -0.16994147]...
[I 2025-07-21 16:12:26,676] Trial 4 finished with value: 44.35473092818972 and parameters: {'hidden_size': 128, 'num_layers': 1, 'dropout': 0.2507464297862117, 'activation': 'gelu', 'learning_rate': 0.0004063409215497555, 'batch_size': 16, 'epochs': 30, 'timesteps': 60, 'weight_decay': 0.0, 'grad_clip': 2.0, 'lstm_dropout': 0.0}. Best is trial 4 with value: 44.35473092818972.


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-2),
