In [31]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os
import json
from itertools import product
import time
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm.notebook import tqdm

In [32]:
DAY_PREFIXES = ['11_10_', '7_24_', '10_19_']

In [33]:
class EnhancedLSTM(nn.Module):
    
    def __init__(self, input_size, hidden_size=64, num_layers=1, dropout=0.2, 
                 activation='relu', output_size=1):
        super(EnhancedLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layers with configurable parameters
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,  # Dropout only for multi-layer
            batch_first=True
        )
        
        # Configurable activation function
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'leaky_relu':
            self.activation = nn.LeakyReLU()
        elif activation == 'gelu':
            self.activation = nn.GELU()
        else:
            self.activation = nn.ReLU()  # Default
        
        # Fully connected layers with dropout
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_size, 32)
        self.fc2 = nn.Linear(32, output_size)
        
    def forward(self, x):
        # LSTM forward pass
        lstm_out, (hn, cn) = self.lstm(x)
        
        # Use the last output
        last_output = lstm_out[:, -1, :]
        
        # Fully connected layers with activation and dropout
        out = self.activation(self.fc1(last_output))
        out = self.dropout(out)
        out = self.fc2(out)
        
        return out

In [34]:
class LSTMHyperparameterTuner:
    def __init__(self):
        # Define hyperparameter search space
        self.hyperparameter_space = {
            'hidden_size': [32, 64, 128, 256],
            'num_layers': [1, 2, 3],
            'dropout': [0.1, 0.2, 0.3, 0.5],
            'activation': ['relu', 'tanh', 'leaky_relu', 'gelu'],
            'learning_rate': [0.0001, 0.001, 0.01, 0.1],
            'batch_size': [8, 16, 32, 64],
            'epochs': [2, 3]
        }
        
        self.best_params = {}
        self.best_score = float('inf')
        self.tuning_results = []
        
    def create_hyperparameter_combinations(self, max_combinations=50):

        # Get all possible combinations
        keys = list(self.hyperparameter_space.keys())
        values = list(self.hyperparameter_space.values())
        all_combinations = list(product(*values))
        
        # If too many combinations, sample intelligently
        if len(all_combinations) > max_combinations:
            # Prioritize certain combinations
            np.random.seed(42)  # For reproducibility
            selected_indices = np.random.choice(
                len(all_combinations), 
                size=max_combinations, 
                replace=False
            )
            combinations = [all_combinations[i] for i in selected_indices]
        else:
            combinations = all_combinations
        
        # Convert to list of dictionaries
        param_combinations = []
        for combo in combinations:
            param_dict = dict(zip(keys, combo))
            param_combinations.append(param_dict)
        
        return param_combinations
    
    def validate_hyperparameters(self, params, X_train, y_train, X_val, y_val, input_size):
 
        try:
            # Convert to PyTorch tensors
            X_train_tensor = torch.FloatTensor(X_train)
            y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
            X_val_tensor = torch.FloatTensor(X_val)
            y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1)
            
            # Create data loaders
            train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
            
            train_loader = DataLoader(
                train_dataset, 
                batch_size=params['batch_size'], 
                shuffle=True
            )
            val_loader = DataLoader(
                val_dataset, 
                batch_size=params['batch_size'], 
                shuffle=False
            )
            
            # Initialize model with hyperparameters
            model = EnhancedLSTM(
                input_size=input_size,
                hidden_size=params['hidden_size'],
                num_layers=params['num_layers'],
                dropout=params['dropout'],
                activation=params['activation']
            )
            
            # Loss and optimizer
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])
            
            # Training loop with early stopping
            best_val_loss = float('inf')
            total_epochs = params['epochs']
            patience = int(0.2 * total_epochs)
            patience_counter = 0
            
            for epoch in tqdm(range(params['epochs']), desc="Epochs"):
                # Training
                model.train()
                train_loss = 0
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    
                    # Gradient clipping to prevent exploding gradients
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    
                    optimizer.step()
                    train_loss += loss.item()
                
                # Validation
                model.eval()
                val_loss = 0
                with torch.no_grad():
                    for batch_X, batch_y in val_loader:
                        outputs = model(batch_X)
                        loss = criterion(outputs, batch_y)
                        val_loss += loss.item()
                
                train_loss /= len(train_loader)
                val_loss /= len(val_loader)
                
                # Early stopping check
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        break
            
            # Calculate additional metrics
            model.eval()
            val_predictions = []
            val_targets = []
            
            with torch.no_grad():
                for batch_X, batch_y in val_loader:
                    outputs = model(batch_X)
                    val_predictions.extend(outputs.squeeze().numpy())
                    val_targets.extend(batch_y.squeeze().numpy())
            
            val_predictions = np.array(val_predictions)
            val_targets = np.array(val_targets)
            
            # Calculate comprehensive metrics
            mse = mean_squared_error(val_targets, val_predictions)
            mae = mean_absolute_error(val_targets, val_predictions)
            r2 = r2_score(val_targets, val_predictions)
            rmse = np.sqrt(mse)
            
            n = len(val_targets)
            p = val_predictions.shape[0] if val_predictions.ndim > 1 else 1
            adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

            
            
            # Convert all numpy.float32 to standard Python float for JSON serialization
            return {
                'val_loss': float(best_val_loss),
                'mse': float(mse),
                'mae': float(mae),
                'rmse': float(rmse),
                'r2': float(r2),
                'adj_r2': float(adj_r2),
                'model': model,
                'epochs_trained': int(epoch + 1) # Ensure integer
            }
            
        except Exception as e:
            print(f"Error with hyperparameters {params}: {str(e)}")
            return {
                'val_loss': float('inf'),
                'mse': float('inf'),
                'mae': float('inf'),
                'rmse': float('inf'),
                'r2': -float('inf'),
                'model': None,
                'epochs_trained': 0
            }
    
    def tune_hyperparameters(self, X_train, y_train, X_val, y_val, input_size, 
                           day_prefix, max_combinations=3):

        print(f"\n{'='*60}")
        print(f"HYPERPARAMETER TUNING FOR {day_prefix}")
        print(f"{'='*60}")
        print(f"Framework: Train/Val split only (Test set untouched)")
        print(f"Search space: {max_combinations} combinations")
        print(f"Validation strategy: Time series split compliance")
        print(f"{'='*60}")
        
        # Generate hyperparameter combinations
        param_combinations = self.create_hyperparameter_combinations(max_combinations)
        
        print(f"Testing {len(param_combinations)} hyperparameter combinations...")
        
        best_model = None
        best_params = None
        best_score = float('inf')
        
        for i, params in enumerate(tqdm(param_combinations, desc="Testing hyperparameters")):
            print(f"\nCombination {i+1}/{len(param_combinations)}")
            print(f"Params: {params}")
            
            start_time = time.time()
            
            # Validate hyperparameters (ONLY using train/val data)
            results = self.validate_hyperparameters(
                params, X_train, y_train, X_val, y_val, input_size
            )
            
            end_time = time.time()
            
            # Store results
            result_entry = {
                'combination': i + 1,
                'params': params,
                'val_loss': results['val_loss'],
                'mse': results['mse'],
                'mae': results['mae'],
                'rmse': results['rmse'],
                'r2': results['r2'],
                'adj_r2': results['adj_r2'],
                'epochs_trained': results['epochs_trained'],
                'training_time': end_time - start_time
            }
            
            self.tuning_results.append(result_entry)
            
            print(f"Val Loss: {results['val_loss']:.4f}, RMSE: {results['rmse']:.4f}, "
            f"MAE: {results['mae']:.4f}, R2: {results['r2']:.4f}, Adj R2: {results['adj_r2']:.4f}, "
            f"Time: {end_time - start_time:.2f}s")
            
            # Update best model
            if results['val_loss'] < best_score:
                best_score = results['val_loss']
                best_params = params.copy()
                best_model = results['model']
                print(f"[BEST] NEW BEST MODEL! Val Loss: {best_score:.4f}")
        
        # Save best hyperparameters and results
        self.best_params[day_prefix] = best_params
        self.best_score = best_score
        
        # Save tuning results
        results_path = f'{day_prefix}hyperparameter_tuning_results.json'
        with open(results_path, 'w') as f:
            json.dump(self.tuning_results, f, indent=2)
        
        print(f"\n{'='*60}")
        print(f"HYPERPARAMETER TUNING COMPLETE FOR {day_prefix}")
        print(f"{'='*60}")
        print(f"Best hyperparameters:")
        for key, value in best_params.items():
            print(f"  {key}: {value}")
        print(f"Best validation loss: {best_score:.4f}")
        print(f"Results saved to: {results_path}")
        print(f"{'='*60}")
        
        return best_model, best_params

In [35]:
def train_optimized_lstm(X_train, y_train, X_val, y_val, input_size, 
                        day_prefix, tune_hyperparameters=True):

    if tune_hyperparameters:
        # Perform hyperparameter tuning
        tuner = LSTMHyperparameterTuner()
        best_model, best_params = tuner.tune_hyperparameters(
            X_train, y_train, X_val, y_val, input_size, day_prefix
        )
        
        # Save the best model
        model_path = f'{day_prefix}lstm_model_optimized.pth'
        torch.save(best_model.state_dict(), model_path)
        
        # Save hyperparameters
        params_path = f'{day_prefix}best_hyperparameters.json'
        with open(params_path, 'w') as f:
            json.dump(best_params, f, indent=2)
        
        print(f"Optimized model saved: {model_path}")
        print(f"Best hyperparameters saved: {params_path}")
        
        return best_model, best_params
    
    else:
        # Use default hyperparameters (original model)
        print(f"Training with default hyperparameters for {day_prefix}")
        
        # Convert to PyTorch tensors
        X_train_tensor = torch.FloatTensor(X_train)
        y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
        X_val_tensor = torch.FloatTensor(X_val)
        y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1)
        
        # Create data loaders
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
        
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
        
        # Initialize model with default parameters
        model = EnhancedLSTM(input_size=input_size)
        
        # Loss and optimizer
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        
        # Training loop
        best_val_loss = float('inf')
        patience = 10
        patience_counter = 0
        
        for epoch in range(100):
            # Training
            model.train()
            train_loss = 0
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
            
            # Validation
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for batch_X, batch_y in val_loader:
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    val_loss += loss.item()
            
            train_loss /= len(train_loader)
            val_loss /= len(val_loader)
            
            if epoch % 10 == 0:
                print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
            
            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                model_path = f'{day_prefix}lstm_model_default.pth'
                torch.save(model.state_dict(), model_path)
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping at epoch {epoch}")
                    break
        
        return model, None

In [36]:
if __name__ == "__main__":
    print("="*80)
    print("ENHANCED LSTM TRAINING WITH HYPERPARAMETER TUNING")
    print("="*80)
    print("Framework Compliance:")
    print("[OK] Test set remains untouched during training and validation")
    print("[OK] Hyperparameter tuning uses only train/val splits")
    print("[OK] Time series split methodology maintained")
    print("[OK] No data leakage prevention implemented")
    print("="*80)
    
    # Configuration
    TUNE_HYPERPARAMETERS = True  # Set to False for default training
    
    for prefix in DAY_PREFIXES:
        npz_path = f'../../{prefix}lstm_preprocessed_data.npz'
        if not os.path.exists(npz_path):
            print(f"File not found: {npz_path}, skipping.")
            continue

        # Load preprocessed data
        data = np.load(npz_path, allow_pickle=True)
        X_train, y_train = data['X_train'], data['y_train']
        X_val, y_val = data['X_val'], data['y_val']
        # NOTE: X_test and y_test are NOT used during training/tuning
        
        input_size = X_train.shape[2]  # Number of features

        print(f"\nProcessing {prefix}:")
        print(f"Train shape: {X_train.shape}, Val shape: {X_val.shape}")
        print(f"Input features: {input_size}")
        print(f"Framework compliance: Test data not accessed")

        # Train with hyperparameter tuning or default parameters
        model, best_params = train_optimized_lstm(
            X_train, y_train, X_val, y_val, input_size, 
            prefix, tune_hyperparameters=TUNE_HYPERPARAMETERS
        )

        if best_params:
            print(f"Model for {prefix} trained with optimized hyperparameters")
        else:
            print(f"Model for {prefix} trained with default hyperparameters")

ENHANCED LSTM TRAINING WITH HYPERPARAMETER TUNING
Framework Compliance:
[OK] Test set remains untouched during training and validation
[OK] Hyperparameter tuning uses only train/val splits
[OK] Time series split methodology maintained
[OK] No data leakage prevention implemented

Processing 11_10_:
Train shape: (83346, 60, 3), Val shape: (11851, 60, 3)
Input features: 3
Framework compliance: Test data not accessed

HYPERPARAMETER TUNING FOR 11_10_
Framework: Train/Val split only (Test set untouched)
Search space: 3 combinations
Validation strategy: Time series split compliance
Testing 3 hyperparameter combinations...


Testing hyperparameters:   0%|          | 0/3 [00:00<?, ?it/s]


Combination 1/3
Params: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.5, 'activation': 'relu', 'learning_rate': 0.001, 'batch_size': 8, 'epochs': 3}


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Val Loss: 134.6281, RMSE: 11.6190, MAE: 9.8357, R2: -0.0096, Adj R2: -0.0097, Time: 325.01s
[BEST] NEW BEST MODEL! Val Loss: 134.6281

Combination 2/3
Params: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.1, 'activation': 'tanh', 'learning_rate': 0.1, 'batch_size': 64, 'epochs': 3}


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Val Loss: 138.5528, RMSE: 11.8848, MAE: 10.2239, R2: -0.0563, Adj R2: -0.0564, Time: 298.63s

Combination 3/3
Params: {'hidden_size': 64, 'num_layers': 3, 'dropout': 0.2, 'activation': 'leaky_relu', 'learning_rate': 0.1, 'batch_size': 32, 'epochs': 2}


Epochs:   0%|          | 0/2 [00:00<?, ?it/s]

Val Loss: 135.4642, RMSE: 11.6304, MAE: 9.9744, R2: -0.0116, Adj R2: -0.0117, Time: 108.90s

HYPERPARAMETER TUNING COMPLETE FOR 11_10_
Best hyperparameters:
  hidden_size: 128
  num_layers: 3
  dropout: 0.5
  activation: relu
  learning_rate: 0.001
  batch_size: 8
  epochs: 3
Best validation loss: 134.6281
Results saved to: 11_10_hyperparameter_tuning_results.json
Optimized model saved: 11_10_lstm_model_optimized.pth
Best hyperparameters saved: 11_10_best_hyperparameters.json
Model for 11_10_ trained with optimized hyperparameters

Processing 7_24_:
Train shape: (45174, 60, 1), Val shape: (6402, 60, 1)
Input features: 1
Framework compliance: Test data not accessed

HYPERPARAMETER TUNING FOR 7_24_
Framework: Train/Val split only (Test set untouched)
Search space: 3 combinations
Validation strategy: Time series split compliance
Testing 3 hyperparameter combinations...


Testing hyperparameters:   0%|          | 0/3 [00:00<?, ?it/s]


Combination 1/3
Params: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.5, 'activation': 'relu', 'learning_rate': 0.001, 'batch_size': 8, 'epochs': 3}


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Val Loss: 40.6437, RMSE: 6.3770, MAE: 3.6727, R2: -0.1396, Adj R2: -0.1397, Time: 292.90s
[BEST] NEW BEST MODEL! Val Loss: 40.6437

Combination 2/3
Params: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.1, 'activation': 'tanh', 'learning_rate': 0.1, 'batch_size': 64, 'epochs': 3}


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Val Loss: 37.2871, RMSE: 7.2644, MAE: 4.5466, R2: -0.4788, Adj R2: -0.4790, Time: 107.16s
[BEST] NEW BEST MODEL! Val Loss: 37.2871

Combination 3/3
Params: {'hidden_size': 64, 'num_layers': 3, 'dropout': 0.2, 'activation': 'leaky_relu', 'learning_rate': 0.1, 'batch_size': 32, 'epochs': 2}


Epochs:   0%|          | 0/2 [00:00<?, ?it/s]

Val Loss: 39.8337, RMSE: 6.3195, MAE: 3.6576, R2: -0.1191, Adj R2: -0.1193, Time: 57.90s

HYPERPARAMETER TUNING COMPLETE FOR 7_24_
Best hyperparameters:
  hidden_size: 128
  num_layers: 3
  dropout: 0.1
  activation: tanh
  learning_rate: 0.1
  batch_size: 64
  epochs: 3
Best validation loss: 37.2871
Results saved to: 7_24_hyperparameter_tuning_results.json
Optimized model saved: 7_24_lstm_model_optimized.pth
Best hyperparameters saved: 7_24_best_hyperparameters.json
Model for 7_24_ trained with optimized hyperparameters

Processing 10_19_:
Train shape: (86641, 60, 3), Val shape: (12325, 60, 3)
Input features: 3
Framework compliance: Test data not accessed

HYPERPARAMETER TUNING FOR 10_19_
Framework: Train/Val split only (Test set untouched)
Search space: 3 combinations
Validation strategy: Time series split compliance
Testing 3 hyperparameter combinations...


Testing hyperparameters:   0%|          | 0/3 [00:00<?, ?it/s]


Combination 1/3
Params: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.5, 'activation': 'relu', 'learning_rate': 0.001, 'batch_size': 8, 'epochs': 3}


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Val Loss: 46.1481, RMSE: 6.7937, MAE: 4.5546, R2: -0.0902, Adj R2: -0.0902, Time: 531.09s
[BEST] NEW BEST MODEL! Val Loss: 46.1481

Combination 2/3
Params: {'hidden_size': 128, 'num_layers': 3, 'dropout': 0.1, 'activation': 'tanh', 'learning_rate': 0.1, 'batch_size': 64, 'epochs': 3}


Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Val Loss: 42.4871, RMSE: 6.5370, MAE: 4.4922, R2: -0.0093, Adj R2: -0.0094, Time: 204.19s
[BEST] NEW BEST MODEL! Val Loss: 42.4871

Combination 3/3
Params: {'hidden_size': 64, 'num_layers': 3, 'dropout': 0.2, 'activation': 'leaky_relu', 'learning_rate': 0.1, 'batch_size': 32, 'epochs': 2}


Epochs:   0%|          | 0/2 [00:00<?, ?it/s]

Val Loss: 42.3982, RMSE: 6.5364, MAE: 4.3857, R2: -0.0091, Adj R2: -0.0092, Time: 106.71s
[BEST] NEW BEST MODEL! Val Loss: 42.3982

HYPERPARAMETER TUNING COMPLETE FOR 10_19_
Best hyperparameters:
  hidden_size: 64
  num_layers: 3
  dropout: 0.2
  activation: leaky_relu
  learning_rate: 0.1
  batch_size: 32
  epochs: 2
Best validation loss: 42.3982
Results saved to: 10_19_hyperparameter_tuning_results.json
Optimized model saved: 10_19_lstm_model_optimized.pth
Best hyperparameters saved: 10_19_best_hyperparameters.json
Model for 10_19_ trained with optimized hyperparameters
