In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, AdamW
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import EarlyStopping
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Dropout, LayerNormalization
from tensorflow.keras.layers import LeakyReLU, Input, RepeatVector,TimeDistributed
import pickle
import os


: 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader, TensorDataset
import pickle
import os

: 

In [None]:
def generate_lstm_multi_step(X_train, y_train):
    
    n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]

    model = Sequential()
    # First LSTM layer with LayerNormalization and recurrent dropout
    model.add(LSTM(256, input_shape=(n_timesteps, n_features),
                return_sequences=True,
                activation='tanh', recurrent_activation='sigmoid',
                recurrent_dropout=0.2,
                kernel_regularizer=l2(0.001)))
    model.add(LSTM(256,
                activation='tanh', recurrent_activation='sigmoid',
                recurrent_dropout=0.2,
                kernel_regularizer=l2(0.001)))
    model.add(RepeatVector(n_outputs))
    model.add(LSTM(128, activation='tanh', recurrent_activation='sigmoid',
                return_sequences=True, recurrent_dropout=0.2,
                kernel_regularizer=l2(0.001)))
    model.add(LSTM(128, activation='tanh', recurrent_activation='sigmoid',
                return_sequences=True, recurrent_dropout=0.2,
                kernel_regularizer=l2(0.001)))

    model.add(TimeDistributed(Dense(64, activation='tanh')))
    model.add(TimeDistributed(Dense(n_features)))
    
    # Compile the model using AdamW optimizer and a learning rate scheduler
    optimizer = AdamW(learning_rate=0.001, weight_decay=1e-5)  # AdamW improves generalization
    model.compile(optimizer=optimizer, loss='mse')

    return model

: 

In [None]:
def train_lstm_multi_step(model, checkpoint_path, X_train, y_train,
                          epochs=100, batch_size = 100,
                          validation_split=0.05,
                          patience = 100,
                          verbose = 0):
    try:
        model.load_weights(checkpoint_path) 
        # with open(os.path.join(checkpoint_path.split('/')[1],checkpoint_path.split('/')[-1].split('.')[0]), 'rb') as f:
        #     history = pickle.load(f)
    except:
        def lr_scheduler(epoch, lr):
        # if epoch < 10:
            return lr
        # Learning rate scheduler callback
        lr_scheduler_callback = LearningRateScheduler(lr_scheduler)

        checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                                    monitor='val_loss', 
                                    save_best_only=True,
                                    mode='min',  
                                    verbose=verbose)

        early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True, verbose=1)

        history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, 
                            validation_split=validation_split,
                            verbose=verbose, callbacks=[checkpoint, lr_scheduler_callback, early_stopping])# ,early_stopping]) 
        
        with open(os.path.join(checkpoint_path.split('/')[1],checkpoint_path.split('/')[-1].split('.')[0]), 'wb') as f:
            pickle.dump(history, f)
    return history

: 

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, n_features, n_outputs):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size=n_features, hidden_size=512, num_layers=1, batch_first=True, dropout=0.5)
        self.dropout1 = nn.Dropout(0.2)  # Add a dropout layer after LSTM
        self.lstm2 = nn.LSTM(input_size=512, hidden_size=512, num_layers=1, batch_first=True, dropout=0.5)
        self.dropout2 = nn.Dropout(0.2)  # Add a dropout layer after LSTM
        self.repeat = n_outputs
        self.lstm3 = nn.LSTM(input_size=512, hidden_size=512, num_layers=1, batch_first=True, dropout=0.5)
        self.dropout3 = nn.Dropout(0.2)  # Add a dropout layer after LSTM
        self.lstm4 = nn.LSTM(input_size=512, hidden_size=512, num_layers=1, batch_first=True, dropout=0.5)
        self.dropout4 = nn.Dropout(0.2)  # Add a dropout layer after LSTM
        self.dense1 = nn.Linear(512, 256)
        self.dropout5 = nn.Dropout(0.2)  # Add a dropout layer after LSTM
        self.dense2 = nn.Linear(256, n_features)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = x[:, -1, :].unsqueeze(1).repeat(1, self.repeat, 1)
        x, _ = self.lstm3(x)
        x = self.dropout3(x)
        x, _ = self.lstm4(x)
        x = self.dropout4(x)
        x = torch.tanh(self.dense1(x))
        x = self.dropout5(x)
        x = self.dense2(x)
        return x

def generate_lstm_multi_step(X_train, y_train):
    _, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]
    model = LSTMModel(n_features, n_outputs)
    model.to(device)  # Move the model to the GPU
    return model

def train_lstm_multi_step(model, checkpoint_path, X_train, y_train, epochs=100, batch_size=100, validation_split=0.05, patience=10, verbose=0):
    # Creating training and validation datasets
    train_size = int((1 - validation_split) * X_train.size(0))
    train_dataset = TensorDataset(X_train[:train_size], y_train[:train_size])
    val_dataset = TensorDataset(X_train[train_size:], y_train[train_size:])
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
    criterion = nn.MSELoss()
    scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 1)  # Adjust lambda function as needed

    best_loss = float('inf')
    patience_counter = 0

    history = {'train_loss':[],'val_loss':[]}
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                output = model(X_batch)
                val_loss += criterion(output, y_batch).item()        
        
        val_loss /= len(val_loader)
        train_loss /= len(train_loader)
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        
        if verbose:
            print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss / len(train_loader)}, Val Loss: {val_loss}")
            
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), checkpoint_path)
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

        scheduler.step()

    with open(f"{os.path.splitext(checkpoint_path)[0]}.pkl", "wb") as f:
        pickle.dump({"val_loss": best_loss}, f)
    return history

# Example usage:
# X_train, y_train are assumed to be preprocessed and loaded as torch tensors
# checkpoint_path = "./checkpoint.pth"
# model = generate_lstm_multi_step(X_train, y_train)
# history = train_lstm_multi_step(model, checkpoint_path, X_train, y_train, batch_size=128, epochs=100, verbose=1)

: 