In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset,DataLoader

In [None]:
class Time2Vec(nn.Module):
    def __init__(self, k):
        super().__init__()
        self.k = k # Dimension of time2vec vector
        self.w = nn.Parameter(torch.randn(k)) # Learnable parameters for linear part
        self.b = nn.Parameter(torch.randn(k)) # Learnable parameters for linear part
        self.w_sin = nn.Parameter(torch.randn(k)) # Learnable parameters for periodic part
        self.b_sin = nn.Parameter(torch.randn(k)) # Learnable parameters for periodic part
        self.w_cos = nn.Parameter(torch.randn(k)) # Learnable parameters for periodic part
        self.b_cos = nn.Parameter(torch.randn(k)) # Learnable parameters for periodic part
    
    def forward(self, x):
        x = x.unsqueeze(-1) # Add extra dimension for vectorization
        linear = self.w * x + self.b # Linear transformation (k features)
        sin_trans = torch.sin(self.w_sin * x + self.b_sin) # Periodic transformation (k features)
        cos_trans = torch.cos(self.w_cos * x + self.b_cos) # Periodic transformation (k features)
        return torch.cat([linear, sin_trans, cos_trans], -1) # Concatenate along last dimension

In [155]:
import numpy as np

# Define hyperparameters
batch_size = 32 # Number of samples per batch
seq_len = 100 # Length of each time series
in_features = 1
n_classes = 20 # Number of classes to predict

# Create synthetic dataset (X: input features, y: labels)
# X = np.random.randn(100,batch_size * seq_len * in_features).reshape(100, batch_size , seq_len)
# y = np.random.randint(0, n_classes , size=(100,batch_size))
def gen_ds(samples = 32_000,name = "dl",batch_size = 32):
    assert samples%n_classes == 0
    xcont = []
    ycont = []
    for i in range(n_classes):
        cnumb = int(samples/n_classes)
        ox = np.random.randn(samples,seq_len*in_features).reshape(samples,seq_len)
        lx = ox + i
        ly = np.full((samples),i)
        print()
        xcont.append(lx)
        ycont.append(ly)
        print(np.mean(ox),np.mean(lx),np.mean(ly),i)


    X = np.concatenate(xcont)
    Y = np.concatenate(ycont)

    # Convert numpy arrays to tensors 
    X = torch.from_numpy(X).float()
    y = torch.from_numpy(Y).long()

    class tds(Dataset):
        def __init__(self,x,y,name) -> None:
            super().__init__()
            self.name = name
            self.X = x
            self.Y = y
            self.pspace = len(self.X)
        def __len__(self):
            return self.pspace
        def __getitem__(self, index):
            return X[index],Y[index]
    ds = tds(X,y,name)
    dl = DataLoader(ds,batch_size=batch_size,shuffle=True)
    return dl
train_dl = gen_ds(samples = 800*20,name = "train_dl")
test_dl = gen_ds(1000,name = "test_dl",batch_size=250)


0.0008363328694443018 0.0008363328694443018 0.0 0

0.00028057973042173843 1.0002805797304206 1.0 1

-0.0012012589032809732 1.9987987410967183 2.0 2

-9.272215855407339e-05 2.999907277841447 3.0 3

0.0008383334331666662 4.000838333433165 4.0 4

0.000314786672998356 5.000314786672999 5.0 5

0.001342008886545417 6.001342008886546 6.0 6

0.0002904069749306065 7.000290406974934 7.0 7

0.0005950789711064216 8.000595078971102 8.0 8

0.0005970212269094795 9.00059702122691 9.0 9

5.848621977691566e-06 10.000005848621973 10.0 10

-9.752237062478575e-05 10.999902477629375 11.0 11

-0.00023057688362219508 11.999769423116371 12.0 12

-0.00043008894132254456 12.99956991105868 13.0 13

-0.001177259016758164 13.99882274098325 14.0 14

-9.294548892638851e-05 14.999907054511073 15.0 15

0.001507606353922871 16.001507606353925 16.0 16

0.0012731548423445455 17.001273154842345 17.0 17

0.0007398046807101274 18.000739804680702 18.0 18

0.0006868269015479005 19.000686826901553 19.0 19

-0.00104482196045784

In [156]:
# Import modules
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

n_features = 16 # Number of features per time step
n_layers = 4 # Number of transformer encoder layers
n_heads = 16 # Number of attention heads per layer
hidden_size = 2048 # Size of hidden state in sublayers 
dropout_rate = 0.2 # Dropout rate for regularization


# Define classifier model 
class Classifier(nn.Module):
    def __init__(self , n_layers , n_features , n_heads , hidden_size , dropout_rate , n_classes,length = 100):
        super(Classifier , self).__init__()
        self.batch_norm = nn.BatchNorm1d(length)
        self.t2v = Time2Vec(n_features)
        self.transformer_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(n_features*3,n_heads,hidden_size,dropout_rate),n_layers) # Transformer encoder layer 
        self.pooling= nn.AdaptiveAvgPool1d(1) # Global average pooling layer 
        self.linear= nn.Linear(n_features*3,n_classes) # Linear layer with softmax activation 

    def forward(self,x):
        # x shape: (batch_size ,seq_len ,n_features)
        x = self.batch_norm(x).to("cuda:0")
        # print(x.shape)
        x = self.t2v(x)
        # print(x.shape)
        x= self.transformer_encoder(x) # Apply transformer encoder on x 
        # print(x.shape)
        x= x.permute(0 ,2 ,1) # Permute x to match expected shape for pooling (batch_size ,n_features ,seq_len)
        x= self.pooling(x) # Apply pooling on x 
        x= x.squeeze(-1) # Remove last dimension 
        x= self.linear(x) # Apply linear layer on x 
        return nn.functional.softmax(x,dim=-1) #Return class probabilities (batch_size ,n_classes)






# """
# Source: Conversation with Bing, 17/02/2023(1) Transformers for Time Series — Transformer 0.3.0 documentation. https://timeseriestransformer.readthedocs.io/en/latest/README.html Accessed 17/02/2023.
# (2) Transformer Time Series Prediction - GitHub. https://github.com/oliverguhr/transformer-time-series-prediction Accessed 17/02/2023.
# (3) Transformers for Time Series - GitHub. https://github.com/maxjcohen/transformer Accessed 17/02/2023.
# (4) GitHub - gzerveas/mvts_transformer: Multivariate Time Series .... https://github.com/gzerveas/mvts_transformer Accessed 17/02/2023.
# (5) Timeseries classification with a Transformer model - Keras. https://keras.io/examples/timeseries/timeseries_transformer_classification/ Accessed 17/02/2023. """

In [None]:

model = Classifier(n_layers=n_layers,n_features = n_features,n_heads=n_heads,hidden_size=hidden_size,dropout_rate=dropout_rate,n_classes=n_classes,length=100).to("cuda:0")
# Define loss function and optimizer 
loss_fn= nn.CrossEntropyLoss().to("cuda:0") # Cross entropy loss function 
optimizer= optim.AdamW(model.parameters(),lr = 1e-5) #Adam optimizer 

In [158]:
n_epochs = 100
# Train the model on the training data 
for epoch in range(n_epochs): #Iterate over epochs 
    epochloss = 0
    ldl = len(train_dl)
    for bnum,(batch_x,batch_y) in enumerate(train_dl): #Iterate over batches 
        # print(batch_x.shape,batch_y.shape)
        batch_x = batch_x.to("cuda:0")
        batch_y = batch_y.to("cuda:0").to(torch.long)

        optimizer.zero_grad() #Clear previous gradients 
        
        output=model(batch_x) #Get model output for current batch (batch_step ,n_classes)
        # print(output.shape)
        loss=loss_fn(output,batch_y) #Compute loss for current batch 
        loss.backward() #Backpropagate loss 
        
        optimizer.step() #Update parameters 
        
        print(f"Epoch {epoch} Batch {bnum}/{ldl} : Loss {loss.item()}",end = "\r",flush=True) #Print epoch and loss 
        epochloss+=loss.item()
    epochloss/=(bnum+1)

    print(f"\nEpoch loss: {epochloss}")
    print("Evaluating with",end = " ")
    with torch.no_grad(): # Disable gradient computation 
        accsum = []
        for bnum,(batch_x,batch_y) in enumerate(train_dl):
            batch_x = batch_x.to("cuda:0")
            batch_y = batch_y.to("cuda:0").to(torch.long)
            output=model(batch_x) #Get model output for validation data (val_batch_size ,n_classes)
            pred=torch.argmax(output,dim=-1) # Get predicted classes (val_batch_size)
            accsum.append((pred==batch_y).float()) # Compute accuracy (%)
            # print(inp.shape, output.shape, pred.shape, rout.shape,acc)
        # print(f"Accuracy on validation data : {acc}%")# Print accuracy
        equals= torch.cat(accsum)
        means = torch.mean(equals)
        print("mean total accuracy:", means.item(),"\n","-=<{|}>=-"*8)
    # Evaluate the model on the validation data 

Epoch 0 Batch 9999/10000 : Loss 2.0882122516632083
Epoch loss: 2.3003254974603653
Evaluating with mean total accuracy: 0.7921093702316284 
 -=<{|}>=--=<{|}>=--=<{|}>=--=<{|}>=--=<{|}>=--=<{|}>=--=<{|}>=--=<{|}>=-
Epoch 1 Batch 906/10000 : Loss 2.2379522323608476

In [None]:
with torch.no_grad(): # Disable gradient computation 
    accsum = []
    for bnum,(batch_x,batch_y) in enumerate(test_dl):
        batch_x = batch_x.to("cuda:0")
        batch_y = batch_y.to("cuda:0")
        print(bnum)
        output=model(batch_x) #Get model output for validation data (val_batch_size ,n_classes)
        pred=torch.argmax(output,dim=-1) # Get predicted classes (val_batch_size)
        accsum.append((pred==batch_y).float()) # Compute accuracy (%)
        # print(inp.shape, output.shape, pred.shape, rout.shape,acc)
    # print(f"Accuracy on validation data : {acc}%")# Print accuracy
    equals= torch.cat(accsum)
    means = torch.mean(equals)
    print(means)

In [None]:
torch.set_printoptions(threshold=500_000)
print(equals)
torch.set_printoptions(profile='default')