Step 1: We import stuff that we needed

In [21]:
# import os
import numpy as np
import random
import torch
from torch import nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer, TransformerDecoderLayer, TransformerDecoder
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.data.dataset import TensorDataset
from torch.optim import Adam
from itertools import product


print("###Imported essential modules and functionality")

###Imported essential modules and functionality


Step 2: We now define the Transformer

In [22]:
class TransformerAutoencoder(nn.Module):
    def __init__(self, 
                 encoder_input_dim, 
                 decoder_input_dim, 
                 hidden_dim,
                 num_heads, 
                 encoder_embedding_dim, 
                 decoder_embedding_dim,
                 num_layers, 
                 dropout):
        super(TransformerAutoencoder, self).__init__()
        self.encoder_input_dim = encoder_input_dim
        self.decoder_input_dim = decoder_input_dim
        self.encoder_embedding_dim = encoder_embedding_dim
        self.decoder_embedding_dim = decoder_embedding_dim
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.dropout = dropout


        # Encoder Embedding
        self.encoder_embedding = nn.Linear(self.encoder_input_dim, self.encoder_embedding_dim)

        # Encoder
        self.encoder_layer = TransformerEncoderLayer(d_model=self.encoder_embedding_dim,
                                                     nhead=self.num_heads,
                                                     dim_feedforward=self.hidden_dim,
                                                     dropout=self.dropout,
                                                     batch_first=True)
        self.encoder = TransformerEncoder(self.encoder_layer,
                                          num_layers=self.num_layers)

        # Decoder Embedding
        self.decoder_embedding = nn.Linear(self.decoder_input_dim, self.decoder_embedding_dim)

        # Decoder
        self.decoder_layer = TransformerDecoderLayer(d_model=self.decoder_embedding_dim,
                                                     nhead=self.num_heads,
                                                     dim_feedforward=self.hidden_dim,
                                                     dropout=self.dropout,
                                                     batch_first=True)
        self.decoder = TransformerDecoder(self.decoder_layer, num_layers=self.num_layers)

        # Final output layer
        self.out = nn.Linear(self.decoder_embedding_dim, self.decoder_input_dim)

    def forward(self, x, y):
        # Use the mask to create a version of x with missing values set to zero.
        
        # Encode the masked input.
        x = self.encoder_embedding(x)
        x_encoded = self.encoder(x)

        # Decode the encoded representation.
        y = self.decoder_embedding(y)
        x_decoded = self.decoder(y, x_encoded)

        # Apply the final output layer
        x_out = self.out(x_decoded)

        return x_out


Step 3: We define our torch DataSet Class for our measurements Y_data and
target values X_data

In [23]:
class GridDataset(Dataset):
    def _load_data_from_memory(self):
        pass
    def __init__(self, inputData, targetData):
        # if data is passed with Y_data then this is the input measurements, the estimated states
        # if data is passed with X_data then this is the target, the groundtruth values
        self.num_samples = inputData.shape[0]
        self.input_dim = inputData.shape[2]
        self.output_dim = targetData.shape[2]

        self.inputs = inputData # features is target

        # apply mask of missing features to the feature set
        # temp_mask = torch.ones(self.num_samples, self.input_dim)
        # temp_mask[:, [0, 1, 16, 17, 24, 25]] = 0 # index of buses missing measurements.
        # self.mask = temp_mask # index of buses missing measurements.
        # self.mask = self.mask.float()
        # self.features = self.features * self.mask

        # targets
        self.targets = targetData

    def __len__(self):
        return self.num_samples
    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]
    
    print("###Defined Classes TransformerAutoEncoder, GridDataset")

###Defined Classes TransformerAutoEncoder, GridDataset


Step 4: We then import the data necessary excluding the initial condition

In [24]:
datachoice = "Lorenz" # $$ acceptable values Lorrentz, KS, Burgers

if datachoice=="Lorenz":
    datafile = "../Data/lorenz_data.npz" # also there is "Data/lorenz_data_diff_ic.npz"
    

# elif datachoice=="KS":
#     datafile = "Data/ks_data.npz"
#     # TODO
# elif datachoice=="burgers":
#     datafile= "Data/burgers_data.npz"
#     # TODO


rawData = np.load(datafile)
allInputs = torch.Tensor(rawData["Y_data"])
allTargets = torch.Tensor(rawData["X_data"])
# TODO 
# exclude initial condition. issue: [1:] makes num samples from 1 to 49 avaialbl and not 1 to 1001 
print("## Loaded data")

## Loaded data


Step 4: We do the splitting of the data set

In [25]:
gridDataSet = GridDataset(allInputs, allTargets)
trainPercent = 0.8
testPercent = 0.1
validatePercent = 0.1
data = [d for d in gridDataSet]
splitDataList = random_split(gridDataSet, [trainPercent, testPercent, validatePercent], torch.Generator().manual_seed(42))

trainingData = splitDataList[0]
testingData = splitDataList[1]
validatingData = splitDataList[2]
lenTrainingData = len(trainingData)
lenTestingData = len(testingData)
lenValidatingData = len(validatingData)
print("## Split the data:\n")
print(f"Training data set size   : {lenTrainingData}\n",
      f"Validation data set size : {lenTestingData}\n",
      f"Test data set size       : {lenValidatingData}")

trainingTensorDataSet = TensorDataset(trainingData.dataset.inputs,
                                      trainingData.dataset.targets)
testingTensorDataSet = TensorDataset(testingData.dataset.inputs,
                                      testingData.dataset.targets)
validatingTensorDataSet = TensorDataset(validatingData.dataset.inputs,
                                      validatingData.dataset.targets)

print(trainingData[0][0].shape)


## Split the data:

Training data set size   : 40
 Validation data set size : 5
 Test data set size       : 5
torch.Size([1001, 1])


In [26]:
def evaluateLossFromTrainingData(train_dataloader, criterion):
    sum_loss = 0
    for i, batch in enumerate(train_dataloader):
        inputs, targets = batch
        optimizer.zero_grad()
        outputs = model(inputs, targets)
        loss = criterion(outputs, targets)
        sum_loss+=loss.item()
        loss.backward()
        optimizer.step()
    avg_loss = sum_loss/len(train_dataloader)
    return avg_loss

In [27]:


# Store best results
best_result = dict({
    'model':[],
    'learn_rate':[],
    'num_epochs':[],
    'encoder_embedding_dim':[],
    'decoder_embedding_dim':[],
    'hidden_dim':[],
    'num_heads':[],
    'weight_decay':[],
    'dropout':[],
    'avg_training_loss':[],
    'best_validation_loss':[]
})


In [None]:

# Define constants
encoder_input_dim = gridDataSet.input_dim
decoder_input_dim = gridDataSet.output_dim
output_dim = gridDataSet.output_dim
num_layers = 4
parameters = dict(num_epochs = [64],
                  hidden_dim = [64],
                  num_heads = [4],
                  encoder_embedding_dim = [16],
                  decoder_embedding_dim = [16],
                  learn_rate = [0.1],
                  weight_decay = [0.005],
                  dropout = [0.05])

param_values = [v for v in parameters.values()]

for run_id, (num_epochs,
             hidden_dim,
             num_heads,
             encoder_embedding_dim, 
             decoder_embedding_dim,
             learn_rate,
             weight_decay, 
             dropout) in enumerate(product(*param_values)):
    criterion = nn.MSELoss()
    trainDataSetLoader =  DataLoader(trainingTensorDataSet, batch_size = 5, shuffle = True)
    validationDataSetLoader = DataLoader(validatingTensorDataSet, batch_size = 5, shuffle = True)

    model = TransformerAutoencoder(encoder_input_dim, 
                                   decoder_input_dim,
                                   hidden_dim, 
                                   num_heads, 
                                   encoder_embedding_dim,
                                   decoder_embedding_dim, 
                                   num_layers, 
                                   dropout)
    optimizer = Adam(model.parameters(), lr=learn_rate, weight_decay=weight_decay)
    best_validation_loss = 1_000_000.
    avg_training_loss = 0.
    avg_validation_loss = 0.

    for epoch in range(num_epochs):
        print(f"EPOCH NUMBER: {epoch+1}")
        model.train(True)
        avg_training_loss = evaluateLossFromTrainingData(trainDataSetLoader, criterion)
        model.eval()
        sum_validation_loss = 0.0
        with torch.no_grad():
            for i, batch in enumerate(validationDataSetLoader):
                inputs, targets = batch
                outputs = model(inputs, targets)
                val_loss = criterion(outputs, targets)
                sum_validation_loss += val_loss.item()
        avg_validation_loss = sum_validation_loss / len(validationDataSetLoader)
        print(f"AVERAGE TRAINING LOSS  : {avg_training_loss}\nAVERAGE VALIDATION LOSS: {avg_validation_loss}")

        if avg_validation_loss < best_validation_loss:
            best_validation_loss = avg_validation_loss
            print(f'BEST VALIDATION LOSS: {best_validation_loss} at EPOCH {epoch+1}')
            best_result['model']=model.state_dict()
            best_result['learn_rate']=learn_rate
            best_result['encoder_embedding_dim']=encoder_embedding_dim
            best_result['decoder_embedding_dim'] =decoder_embedding_dim
            best_result['num_epochs']=num_epochs
            best_result['hidden_dim']=hidden_dim
            best_result['num_heads']=num_heads
            best_result['weight_decay']=weight_decay
            best_result['dropout']=dropout
            best_result['avg_training_loss']=avg_training_loss
            best_result['best_validation_loss']=best_validation_loss

EPOCH NUMBER: 1
AVERAGE TRAINING LOSS  : 226.04327850341798
AVERAGE VALIDATION LOSS: 206.39358215332032
BEST VALIDATION LOSS: 206.39358215332032 at EPOCH 1
EPOCH NUMBER: 2
AVERAGE TRAINING LOSS  : 192.12095336914064
AVERAGE VALIDATION LOSS: 172.75513458251953
BEST VALIDATION LOSS: 172.75513458251953 at EPOCH 2
EPOCH NUMBER: 3
AVERAGE TRAINING LOSS  : 157.1037139892578
AVERAGE VALIDATION LOSS: 136.71775817871094
BEST VALIDATION LOSS: 136.71775817871094 at EPOCH 3
EPOCH NUMBER: 4
AVERAGE TRAINING LOSS  : 121.9678466796875
AVERAGE VALIDATION LOSS: 104.11313400268554
BEST VALIDATION LOSS: 104.11313400268554 at EPOCH 4
EPOCH NUMBER: 5
AVERAGE TRAINING LOSS  : 93.25796432495117
AVERAGE VALIDATION LOSS: 81.54179306030274
BEST VALIDATION LOSS: 81.54179306030274 at EPOCH 5
EPOCH NUMBER: 6
AVERAGE TRAINING LOSS  : 76.32467803955078
AVERAGE VALIDATION LOSS: 71.43019256591796
BEST VALIDATION LOSS: 71.43019256591796 at EPOCH 6
EPOCH NUMBER: 7
AVERAGE TRAINING LOSS  : 68.20082931518554
AVERAGE VALID

In [None]:
min_loss_model = TransformerAutoencoder(encoder_input_dim, 
                                        decoder_input_dim, 
                                        hidden_dim, 
                                        num_heads,
                                        encoder_embedding_dim, 
                                        decoder_embedding_dim, 
                                        num_layers, 
                                        dropout)

min_loss_model.load_state_dict(best_result["model"])

TransformerAutoencoder(
  (encoder_embedding): Linear(in_features=1, out_features=16, bias=True)
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
    )
    (linear1): Linear(in_features=16, out_features=64, bias=True)
    (dropout): Dropout(p=0.05, inplace=False)
    (linear2): Linear(in_features=64, out_features=16, bias=True)
    (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.05, inplace=False)
    (dropout2): Dropout(p=0.05, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
        )
        (linear1): Linear(in_features=16, out_features=64, bias=

Step 5: Predictions

In [42]:
def predict(model, dataSetLoader, criterion):
    predictions = []
    labels = []
    sum_loss = 0
    with torch.no_grad():
        for batch in dataSetLoader:
            inputs, targets = batch
            prediction = model(inputs, targets)
            predictions.append(prediction)
            labels.append(targets)
            test_loss = criterion(prediction,targets)
            sum_loss+=test_loss
        avg_test_loss = sum_loss/len(dataSetLoader)
    
    print(f"AVERATE TEST LOSS: {avg_test_loss}")



In [43]:
criterion = nn.MSELoss()
predict(min_loss_model, testingTensorDataSet,criterion)

AVERATE TEST LOSS: 41.233917236328125


tensor([[ 7.3766,  8.4234, 26.1450],
        [ 7.0173,  7.9017, 26.4439],
        [ 7.0534,  8.2067, 25.8682],
        ...,
        [-5.2139, -4.9795, 23.9318],
        [-4.9670, -4.7275, 24.3176],
        [ 7.1414,  8.0633, 26.2787]])
