In [None]:
pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
import numpy as np
import random
import math

## LSTM

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out



In [None]:
# Hyperparameters
input_size = 10   # number of features
hidden_size = 50
num_layers = 2    # number of stacked LSTM layers
output_size = 1   # number of output features
num_epochs = 100
learning_rate = 0.001

# Generate dummy data
# Here we use a random tensor of shape (batch_size, sequence_length, input_size)
batch_size = 32
sequence_length = 15
x_train = torch.randn(batch_size, sequence_length, input_size)
y_train = torch.randn(batch_size, output_size)

# Initialize the model, loss function, and optimizer
model = LSTMModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode

    # Forward pass
    outputs = model(x_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training finished.")


Epoch [10/100], Loss: 1.1983
Epoch [20/100], Loss: 1.0617
Epoch [30/100], Loss: 0.7005
Epoch [40/100], Loss: 0.4617
Epoch [50/100], Loss: 0.3072
Epoch [60/100], Loss: 0.2030
Epoch [70/100], Loss: 0.1107
Epoch [80/100], Loss: 0.0565
Epoch [90/100], Loss: 0.0227
Epoch [100/100], Loss: 0.0068
Training finished.


## Encoder

In [None]:
class EncoderPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(EncoderPredictor, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(hn[-1])
        return out


In [None]:
# Hyperparameters
input_size = 10   # Number of features
hidden_size = 50
num_layers = 2
output_size = 1   # Number of output features (for regression)
num_epochs = 100
learning_rate = 0.001

# Generate dummy data
batch_size = 32
sequence_length = 15
x_train = torch.randn(batch_size, sequence_length, input_size)
y_train = torch.randn(batch_size, output_size)

# Initialize the model, loss function, and optimizer
model = EncoderPredictor(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode

    # Forward pass
    outputs = model(x_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training finished.")

# Evaluation
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    predictions = model(x_train)
    mse = criterion(predictions, y_train)
    rmse = torch.sqrt(mse)
    print(f'RMSE: {rmse.item():.4f}')


Epoch [10/100], Loss: 0.8159
Epoch [20/100], Loss: 0.7734
Epoch [30/100], Loss: 0.6840
Epoch [40/100], Loss: 0.5553
Epoch [50/100], Loss: 0.3609
Epoch [60/100], Loss: 0.2358
Epoch [70/100], Loss: 0.1342
Epoch [80/100], Loss: 0.0505
Epoch [90/100], Loss: 0.0139
Epoch [100/100], Loss: 0.0042
Training finished.
RMSE: 0.0618


## Decoder

In [None]:
class DecoderPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(DecoderPredictor, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        out, (hn, cn) = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])
        return out

In [None]:
# Hyperparameters
input_size = 10   # Number of features
hidden_size = 50  # Number of features in the hidden state
num_layers = 2    # Number of stacked LSTM layers
output_size = 1   # Number of output features (for regression)
num_epochs = 100  # Number of training epochs
learning_rate = 0.001  # Learning rate

# Generate dummy data
batch_size = 32
sequence_length = 15
x_train = torch.randn(batch_size, sequence_length, input_size)
y_train = torch.randn(batch_size, output_size)

# Initialize the model, loss function, and optimizer
decoder = DecoderPredictor(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

# Initialize hidden state and cell state
h0 = torch.zeros(num_layers, batch_size, hidden_size)
c0 = torch.zeros(num_layers, batch_size, hidden_size)

# Training loop
for epoch in range(num_epochs):
    decoder.train()  # Set model to training mode

    # Forward pass
    outputs = decoder(x_train, (h0, c0))
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training finished.")

# Evaluation
decoder.eval()  # Set model to evaluation mode
with torch.no_grad():
    predictions = decoder(x_train, (h0, c0))
    mse = criterion(predictions, y_train)
    rmse = torch.sqrt(mse)
    print(f'RMSE: {rmse.item():.4f}')


Epoch [10/100], Loss: 0.7202
Epoch [20/100], Loss: 0.6732
Epoch [30/100], Loss: 0.5511
Epoch [40/100], Loss: 0.2877
Epoch [50/100], Loss: 0.1333
Epoch [60/100], Loss: 0.0543
Epoch [70/100], Loss: 0.0204
Epoch [80/100], Loss: 0.0056
Epoch [90/100], Loss: 0.0014
Epoch [100/100], Loss: 0.0006
Training finished.
RMSE: 0.0247


## Transformer

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

class TransformerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, nhead=8):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(hidden_size)
        self.encoder = nn.Linear(input_size, hidden_size)
        self.transformer = nn.Transformer(hidden_size, nhead, num_layers, num_layers, dropout=0.1)
        self.decoder = nn.Linear(hidden_size, output_size)
        self.hidden_size = hidden_size

    def forward(self, src, tgt):
        src = self.encoder(src) * math.sqrt(self.hidden_size)
        tgt = self.encoder(tgt) * math.sqrt(self.hidden_size)
        src = self.pos_encoder(src)
        tgt = self.pos_encoder(tgt)
        output = self.transformer(src, tgt)
        output = self.decoder(output)
        return output

In [None]:
# Hyperparameters
input_size = 5
hidden_size = 512
num_layers = 6
output_size = 5
num_epochs = 10
learning_rate = 0.005

# Generate dummy data
batch_size = 32
sequence_length = 15
x_train = torch.randn(sequence_length, batch_size, input_size)
y_train = torch.randn(sequence_length, batch_size, output_size)

# Initialize the model, loss function, and optimizer
model = TransformerModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode

    # Forward pass
    outputs = model(x_train, y_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 2 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training finished.")

# Evaluation
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    predictions = model(x_train, y_train)
    mse = criterion(predictions, y_train)
    rmse = torch.sqrt(mse)
    print(f'RMSE: {rmse.item():.4f}')


Epoch [2/10], Loss: 23.9361
Epoch [4/10], Loss: 1.7833
Epoch [6/10], Loss: 2.4093
Epoch [8/10], Loss: 1.5820
