In [248]:
import math

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import CIFAR10, MNIST
from transformers import BertModel, BertTokenizer

In [239]:
# device = torch.device("mps" if torch.has_mps else "cpu")
device = torch.device('cpu')

In [273]:
class TimeSeriesEncoder(nn.Module):
    def __init__(self, num_features, num_hidden, num_layers, num_heads, output_size):
        super(TimeSeriesEncoder, self).__init__()

        self.num_hidden = num_hidden

        self.position_encoding = nn.Parameter(torch.randn(1, 1, num_hidden))
        self.feature_embedding = nn.Linear(num_features, num_hidden)

        self.transformer_layer = TransformerEncoderLayer(
            d_model=num_hidden,
            nhead=num_heads,
            dim_feedforward=num_hidden * 4,
            dropout=0.1,
            activation='relu'
        )
        self.transformer_encoder = TransformerEncoder(
            encoder_layer=self.transformer_layer,
            num_layers=num_layers
        )

        self.projection_layer = nn.Linear(num_hidden, output_size)

    def forward(self, x):
        batch_size, timesteps, _ = x.size()

        x = self.feature_embedding(x)
        pos_enc = self.position_encoding.repeat(1, timesteps, 1)
        x = x + pos_enc

        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.projection_layer(x)

        return x


In [274]:
num_features = 20
num_hidden = 256
num_layers = 6
num_heads = 8
output_size = 2048

model = TimeSeriesEncoder(num_features, num_hidden, num_layers, num_heads, output_size)
print(model)


TimeSeriesEncoder(
  (feature_embedding): Linear(in_features=20, out_features=256, bias=True)
  (transformer_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
    )
    (linear1): Linear(in_features=256, out_features=1024, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=1024, out_features=256, bias=True)
    (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256,

In [280]:

# Example input tensor with shape (batch_size, timesteps, features)
input_tensor = torch.randn(32, 500, num_features)
output = model(input_tensor)
print(output.shape)  # Should print torch.Size([32, 2048])

torch.Size([32, 2048])


In [282]:
output.shape

torch.Size([32, 2048])