In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define the BiLSTM model
class BiLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(BiLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)

        # Define the output layer
        self.linear = nn.Linear(hidden_dim*2, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_dim).to(x.device)

        # Initialize cell state
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_dim).to(x.device)

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        out, _ = self.lstm(x, (h0.detach(), c0.detach()))

        # Decode the hidden state of the last time step
        out = self.linear(out[:, -1, :])

        return out
    

# Generate synthetic dataset
N = 1000 # Number of samples
T = 10   # Sequence length
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1

# Randomly initialize the dataset
X = torch.randn(N, T, input_dim)
Y = torch.randn(N, output_dim)

# Split the dataset into training and test sets
train_x = X[:800]
train_y = Y[:800]
test_x = X[800:]
test_y = Y[800:]

# Initialize the model, loss function and optimizer
model = BiLSTM(input_dim, hidden_dim, output_dim, num_layers)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(train_x)
    loss = criterion(outputs, train_y)

    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print('Epoch: {}/{}.............'.format(epoch+1, epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

# Testing
model.eval()
with torch.no_grad():
    predicted = model(test_x)
    loss = criterion(predicted, test_y)
print('Test Loss: {:.4f}'.format(loss.item()))


Epoch: 10/100............. Loss: 1.0396
Epoch: 20/100............. Loss: 1.0376
Epoch: 30/100............. Loss: 1.0349
Epoch: 40/100............. Loss: 1.0215
Epoch: 50/100............. Loss: 1.0001
Epoch: 60/100............. Loss: 0.9510
Epoch: 70/100............. Loss: 0.8840
Epoch: 80/100............. Loss: 0.7668
Epoch: 90/100............. Loss: 0.6515
Epoch: 100/100............. Loss: 0.5292
Test Loss: 1.5937


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define the LSTM model
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

        # Define the output layer
        self.linear = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # Initialize cell state
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        out, _ = self.lstm(x, (h0.detach(), c0.detach()))

        # Decode the hidden state of the last time step
        out = self.linear(out[:, -1, :])

        return out

# Generate synthetic dataset
N = 1000 # Number of samples
T = 10   # Sequence length
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 2  # We are predicting both the next and previous states

# Randomly initialize the dataset
X_prior = torch.randn(N, T, input_dim)
X = X_prior[1:-1]
Y_forward = X_prior[0:-2]  # Forward 
Y_backward = X_prior[2:]  #Backward


# Create input-output pairs for training that include both the next and previous states
Y_pairs = torch.zeros(X.shape[0], T, output_dim)
Y_pairs[:, :, 0] = Y_forward
Y_pairs[:, :, 1] = Y_backward

# Split the dataset into training and test sets
train_x = X[:800]
train_y = Y_pairs[:800]
test_x = X[800:]
test_y = Y_pairs[800:]

# Initialize the model, loss function and optimizer
model = LSTM(input_dim, hidden_dim, output_dim, num_layers)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(train_x)
    print (train_x.shape, outputs.shape,train_y.shape)
    loss = criterion(outputs, train_y)

    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print('Epoch: {}/{}.............'.format(epoch+1, epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))


RuntimeError: expand(torch.FloatTensor{[998, 10, 1]}, size=[998, 10]): the number of sizes provided (2) must be greater or equal to the number of dimensions in the tensor (3)

In [12]:
Y_pairs.shape

torch.Size([998, 10, 2])