In [1]:
from torch.utils.data import DataLoader
from utils import *
from matplotlib import pyplot
import os

In [2]:
torch.set_default_dtype(torch.float64)
# Okay, we kind of have a way of loading the data
# We need to collect the data and feed it to the transformer model
# Now how do we that ?


# Torch Tensor data !
path = './Datasets/intracardiac_dataset/'
VmTrainData, pECGTrainData, VmDataTest, pECGTestData  = fileReader(path)
print('Data loading from files - complete')

VmTrainData = (VmTrainData - torch.min(VmTrainData))/(torch.max(VmTrainData) - torch.min(VmTrainData))
pECGTrainData = (pECGTrainData - torch.min(pECGTrainData))/(torch.max(pECGTrainData) - torch.min(pECGTrainData))

VmDataTest = (VmDataTest - torch.min(VmDataTest))/(torch.max(VmDataTest) - torch.min(VmDataTest))

pECGTestData = (pECGTestData - torch.min(pECGTestData))/(torch.max(pECGTestData) - torch.min(pECGTestData))
print('Normalization - complete!')


Loading datafiles : 100%|██████████| 16117/16117 [00:55<00:00, 291.68it/s]


Data loading from files - complete
Normalization - complete!


In [3]:
## Model parameters
dim_val = 512
n_heads = 2
n_decoder_layers = 4
n_encoder_layers = 4
input_size = 12
dec_seq_len = 10 
enc_seq_len = 10
output_sequence_length = 2
max_seq_len = enc_seq_len
train_batch_size = 16
test_batch_size = 10
batch_first= False
output_size = 75

In [4]:
from dataset import TransformerDataset
# Get the indices of the sequences
# The idea is: start - stop, where stop - start is window_size
# This means, each tuple in VmInd and pECGInd is 50 steps
datInd = get_indices_entire_sequence(VmData = VmTrainData, 
                                            ECGData = pECGTrainData, 
                                            window_size= enc_seq_len + output_sequence_length, 
                                            step_size = 1)

# Now let's collect the training data in the Transformer Dataset class
TrainData = TransformerDataset(VmData = VmTrainData,
                                    datInd=datInd,
                                    ECGData = pECGTrainData,
                                    enc_seq_len = enc_seq_len,
                                    dec_seq_len = dec_seq_len,
                                    target_seq_len = output_sequence_length
                                )


TrainData = DataLoader(TrainData, train_batch_size)

datInd = get_indices_entire_sequence(VmData = VmDataTest, 
                                            ECGData = pECGTestData, 
                                            window_size= enc_seq_len + output_sequence_length, 
                                            step_size = 1)


# Now, let's load the test data
TestData = TransformerDataset(VmData = VmDataTest, 
                                    ECGData = pECGTestData,
                                    datInd=datInd,
                                    enc_seq_len = enc_seq_len,
                                    dec_seq_len = dec_seq_len,
                                    target_seq_len = output_sequence_length
                                )

TestData = DataLoader(TestData, test_batch_size)


In [5]:
from models import TimeSeriesTransformer
model = TimeSeriesTransformer(
    dim_val=dim_val,
    batch_first=batch_first,
    input_size=input_size, 
    dec_seq_len=dec_seq_len,
    out_seq_len=output_sequence_length, 
    n_decoder_layers=n_decoder_layers,
    n_encoder_layers=n_encoder_layers,
    n_heads=n_heads,
    num_predicted_features=output_size
)

# Define the MSE loss
criterion = torch.nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)




In [6]:
EPOCHS = 1
for epoch in tqdm(range(EPOCHS), desc = 'Training ', unit='epochs'):
    batch = 0
    for src, trg, trg_y in tqdm(TrainData, unit = 'batches'):
        optimizer.zero_grad()
        
        # Do it for each step in the sequence and predict the loss
        seqLen = src.shape[2]
        loss = 0
        predList = []
        for i in range(seqLen):

            srciSeq = src[:, :, i, :]
            trgiSeq = trg[:, :, i, :]
            trg_yiSeq = trg_y[:, :, i, :]
            src_mask = generate_square_subsequent_mask(
                dim1=output_sequence_length,
                dim2=enc_seq_len
            )
            tgt_mask = generate_square_subsequent_mask(
                dim1=output_sequence_length,
                dim2=output_sequence_length
            )
            srciSeq = srciSeq.permute(1, 0, 2)
            trgiSeq = trgiSeq.permute(1, 0, 2)
            prediction = model(
                src=srciSeq,
                tgt=trgiSeq,
                src_mask=src_mask,
                tgt_mask=tgt_mask
            )
            
            predList.append(prediction.permute(1,0,2))
        
        loss = criterion(trg_y, torch.stack(predList, axis = 2))
        print("Epoch: {epoch}    sequence: {i}/{seqLen}    Loss: {loss}    batch: {batch}/{total_batches}".format(epoch=epoch+1, i=i+1, seqLen=seqLen, loss=loss.item(),batch=batch+1, total_batches=len(TrainData)))
        loss.backward()
        optimizer.step()

        batch+=1

    

Training :   0%|          | 0/1 [00:00<?, ?epochs/s]

Epoch: 1    sequence: 488/488    Loss: 0.588380103094737    batch: 1/31




Epoch: 1    sequence: 488/488    Loss: 0.26923707422368176    batch: 2/31




Epoch: 1    sequence: 488/488    Loss: 0.16643682063210213    batch: 3/31


  6%|▋         | 2/31 [19:38<4:44:43, 589.08s/batches]
Training :   0%|          | 0/1 [19:38<?, ?epochs/s]


KeyboardInterrupt: 

In [16]:
# EPOCHS = 1
sequences = []
flag = True
sequences = None
with torch.no_grad():
    for src, trg, trg_y in tqdm(TestData, unit = 'batches'):
        optimizer.zero_grad()
        
        # Do it for each step in the sequence and predict the loss
        seqLen = src.shape[2]
        loss = 0
        predList = []
        for i in range(seqLen):

            srciSeq = src[:, :, i, :]
            trgiSeq = trg[:, :, i, :]
            trg_yiSeq = trg_y[:, :, i, :]
            src_mask = generate_square_subsequent_mask(
                dim1=output_sequence_length,
                dim2=enc_seq_len
            )
            tgt_mask = generate_square_subsequent_mask(
                dim1=output_sequence_length,
                dim2=output_sequence_length
            )
            srciSeq = srciSeq.permute(1, 0, 2)
            trgiSeq = trgiSeq.permute(1, 0, 2)
            prediction = model(
                src=srciSeq,
                tgt=trgiSeq,
                src_mask=src_mask,
                tgt_mask=tgt_mask
            )
            if flag:
                output = prediction.permute(1,0,2)[:, -1, :].unsqueeze(1)
                flag = False
            
            else:
                output = torch.cat((output, prediction.permute(1,0,2)[:, -1, :].unsqueeze(1)), axis = 1)
            
         
        if sequences is None:
            sequences = output
        else:
            sequences = torch.cat([sequences, output], axis =0)
        flag = False
    

  2%|▏         | 1/49 [00:59<47:13, 59.04s/batches]


RuntimeError: torch.cat(): Sizes of tensors must match except in dimension 0. Got 488 and 976 in dimension 1 (The offending index is 1)

In [8]:
print(prediction.permute(1, 0, 2).shape)

torch.Size([16, 2, 75])
