# Note
The below is a very barebones training of an LSTM for sequence prediction, mostly to test that
my code works. \
It will be replaced by a more extensive example on music generation trained on 
the Bach Chorale dataset. 

In [1]:
import numpy as np

# Generate sine wave data
timesteps = 1000  # total timesteps
data = np.sin(np.linspace(0, 20 * np.pi, timesteps))  # sine wave

# Function to create dataset
def create_dataset(data, look_back):
    X, Y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back)])
        Y.append(data[i + look_back])
    return np.array(X), np.array(Y)

# Create dataset
look_back = 10  # number of previous time steps to consider
X, Y = create_dataset(data, look_back)
print(X.shape)
print(Y.shape)

# Split into train and test sets
train_size = int(len(X) * 0.67)
test_size = len(X) - train_size
X_train, Y_train = X[:train_size], Y[:train_size]
X_test, Y_test = X[train_size:], Y[train_size:]

(990, 10)
(990,)


In [2]:
from layers import LSTM 
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import torch.nn.functional as F

# Convert data to PyTorch tensors and create DataLoader for batch processing
train_data = TensorDataset(torch.Tensor(X_train), torch.Tensor(Y_train))
test_data = TensorDataset(torch.Tensor(X_test), torch.Tensor(Y_test))

batch_size = 64
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)


# Instantiate the LSTM model
input_dim = 1  # As we are using a single feature (sine wave value)
hidden_dim = 50  # Number of features in the hidden state, can be tuned
num_layers = 1  # Number of recurrent layers
model = LSTM(in_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers)

In [3]:
# Define loss function and optimizer
loss_function = torch.nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    for x_batch, y_batch in train_loader:
        # Adding the feature dimension and transpose (batch, len, 1) to (len, batch, 1)
        x_batch = torch.transpose(x_batch.unsqueeze(-1), 0, 1)  
        optimizer.zero_grad()
        output, (h_n, c_n) = model(x_batch)
        loss = loss_function(output[-1, :, -1], y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

model.eval()
test_loss = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        # Adding the feature dimension and transpose (batch, len, 1) to (len, batch, 1)
        x_batch = torch.transpose(x_batch.unsqueeze(-1), 0, 1)
        output, _ = model(x_batch)
        # Extracting the last element of the output sequence and comparing with scalar target
        test_loss += loss_function(output[-1, :, -1], y_batch).item()

test_loss /= len(test_loader)
print(f"Test Loss: {test_loss}")


Epoch 1, Loss: 0.41942521929740906
Epoch 2, Loss: 0.34045082330703735
Epoch 3, Loss: 0.3728897273540497
Epoch 4, Loss: 0.3077889084815979
Epoch 5, Loss: 0.36738166213035583
Epoch 6, Loss: 0.34604257345199585
Epoch 7, Loss: 0.35525238513946533
Epoch 8, Loss: 0.26072826981544495
Epoch 9, Loss: 0.39337876439094543
Epoch 10, Loss: 0.21983441710472107
Epoch 11, Loss: 0.23176570236682892
Epoch 12, Loss: 0.2676360011100769
Epoch 13, Loss: 0.24578633904457092
Epoch 14, Loss: 0.22348769009113312
Epoch 15, Loss: 0.2264477014541626
Epoch 16, Loss: 0.14031729102134705
Epoch 17, Loss: 0.21162086725234985
Epoch 18, Loss: 0.1525958627462387
Epoch 19, Loss: 0.18989117443561554
Epoch 20, Loss: 0.17115388810634613
Epoch 21, Loss: 0.20130911469459534
Epoch 22, Loss: 0.14241954684257507
Epoch 23, Loss: 0.13836641609668732
Epoch 24, Loss: 0.2191215604543686
Epoch 25, Loss: 0.1494072675704956
Epoch 26, Loss: 0.12727472186088562
Epoch 27, Loss: 0.14751796424388885
Epoch 28, Loss: 0.17398634552955627
Epoch 29