In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from itertools import product
import os

def create_sequences(X, y, time_steps=10):
    """
    Transform data into sequences suitable for LSTM
    
    Parameters:
    X (array): Features
    y (array): Target
    time_steps (int): Number of time steps in each sequence
    
    Returns:
    X_seq, y_seq: Data transformed into sequences
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    return np.array(X_seq), np.array(y_seq)

In [2]:
def build_lstm_model(input_shape, units=50, dropout_rate=0.2, learning_rate=0.001):
    """
    Build an LSTM model with the specified architecture
    
    Parameters:
    input_shape (tuple): Shape of input data (time_steps, features)
    units (int): Number of LSTM units
    dropout_rate (float): Dropout rate for regularization
    learning_rate (float): Learning rate for Adam optimizer
    
    Returns:
    model: Compiled Keras LSTM model
    """
    model = Sequential()
    
    # First LSTM layer with return sequences for stacking
    model.add(LSTM(units=units, 
                  return_sequences=True, 
                  input_shape=input_shape, 
                  recurrent_dropout=dropout_rate/2))
    model.add(Dropout(dropout_rate))
    
    # Second LSTM layer
    model.add(LSTM(units=units//2, 
                  return_sequences=False, 
                  recurrent_dropout=dropout_rate/2))
    model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(1))
    
    # Compile model
    model.compile(optimizer=Adam(learning_rate=learning_rate), 
                 loss='mse', 
                 metrics=['mae'])
    
    return model

In [5]:
def perform_simplified_grid_search(X_train, y_train, X_val, y_val, hyperparams):
    """
    Simplified grid search that uses validation data for faster evaluation
    
    Parameters:
    X_train, y_train: Training data
    X_val, y_val: Validation data
    hyperparams: Dictionary of hyperparameters to search
    
    Returns:
    best_params: Dictionary of best hyperparameters
    """
    best_params = None
    best_val_loss = float('inf')
    input_shape = (X_train.shape[1], X_train.shape[2])
    
    # Generate all combinations of hyperparameters
    param_combinations = list(product(
        hyperparams['units'],
        hyperparams['learning_rate'],
        hyperparams['dropout_rate'],
        hyperparams['batch_size']
    ))
    
    total_combinations = len(param_combinations)
    print(f"Testing {total_combinations} hyperparameter combinations...")
    
    for i, params in enumerate(param_combinations):
        units, learning_rate, dropout_rate, batch_size = params
        print(f"Combination {i+1}/{total_combinations}: units={units}, lr={learning_rate}, dropout={dropout_rate}, batch_size={batch_size}")
        
        # Build model with current parameters
        model = build_lstm_model(input_shape, units, dropout_rate, learning_rate)
        
        # Train for only a few epochs to save time
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=10,
            batch_size=batch_size,
            verbose=0
        )
        
        # Get validation loss
        val_loss = history.history['val_loss'][-1]
        print(f"Validation loss: {val_loss:.6f}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = {
                'units': units,
                'learning_rate': learning_rate,
                'dropout_rate': dropout_rate,
                'batch_size': batch_size
            }
            print(f"New best parameters found!")
    
    return best_params

In [6]:

def train_lstm_model(X_train_scaled, y_train_scaled, X_test_scaled, y_test_scaled, time_steps=20):
    """
    Train an LSTM model with hyperparameter optimization
    
    Parameters:
    X_train_scaled, y_train_scaled: Scaled training data
    X_test_scaled, y_test_scaled: Scaled testing data
    time_steps (int): Number of time steps for sequences
    
    Returns:
    best_model: Trained LSTM model
    history: Training history
    X_test_seq: Sequence test data for predictions
    y_test_seq: Sequence test targets
    """
    # Convert to numpy arrays if they're DataFrames/Series
    if isinstance(X_train_scaled, pd.DataFrame):
        X_train_scaled = X_train_scaled.values
    if isinstance(X_test_scaled, pd.DataFrame):
        X_test_scaled = X_test_scaled.values
    if isinstance(y_train_scaled, pd.Series):
        y_train_scaled = y_train_scaled.values.reshape(-1, 1)
    if isinstance(y_test_scaled, pd.Series):
        y_test_scaled = y_test_scaled.values.reshape(-1, 1)
    elif isinstance(y_train_scaled, np.ndarray) and len(y_train_scaled.shape) == 1:
        y_train_scaled = y_train_scaled.reshape(-1, 1)
    elif isinstance(y_test_scaled, np.ndarray) and len(y_test_scaled.shape) == 1:
        y_test_scaled = y_test_scaled.reshape(-1, 1)
    
    # Create sequences
    X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, time_steps)
    X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, time_steps)
    
    print(f"Training sequence shape: {X_train_seq.shape}")
    print(f"Testing sequence shape: {X_test_seq.shape}")
    
    # Define hyperparameters for optimization
    lstm_hyperparams = {
        'units': [32, 64, 128],
        'learning_rate': [0.001, 0.0005],
        'dropout_rate': [0.2, 0.3],
        'batch_size': [32, 64]
    }
    
    # Find best hyperparameters (simplified version to save time)
    best_params = perform_simplified_grid_search(
        X_train_seq, y_train_seq, X_test_seq, y_test_seq, lstm_hyperparams
    )
    
    print(f"Best hyperparameters: {best_params}")
    
    # Create model with best parameters
    input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
    best_model = build_lstm_model(
        input_shape, 
        units=best_params['units'], 
        dropout_rate=best_params['dropout_rate'], 
        learning_rate=best_params['learning_rate']
    )
    
    # Define callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001),
        ModelCheckpoint('models/best_lstm_model.h5', monitor='val_loss', save_best_only=True, verbose=1)
    ]
    
    # Train the model
    history = best_model.fit(
        X_train_seq, y_train_seq,
        validation_data=(X_test_seq, y_test_seq),
        epochs=100,
        batch_size=best_params['batch_size'],
        callbacks=callbacks,
        verbose=1
    )
    
    # Ensure directory exists
    if not os.path.exists('models'):
        os.makedirs('models')
    
    # Save model
    best_model.save('models/lstm_model.h5')
    
    return best_model, history, X_test_seq, y_test_seq, best_params