In [1]:
import numpy as np
import pandas as pd
import torch

from imputation_utils import *

In [2]:
class TransformerImputer:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(self.device)

        # model params
        self.n_variables = len(VARIABLES)
        self.sequence_length = 48 # max: 1440 (full day)
        self.d_model = 16
        self.n_heads = 4
        self.n_encoder_layers = 3
        self.lm = 43
        self.masking_ratio = 0.21
        self.dropout_rate = 0.1

        self.model = TSTransformerEncoder(feat_dim=self.n_variables,
                                          max_len=self.sequence_length,
                                          d_model=self.d_model,
                                          n_heads=self.n_heads,
                                          num_layers=self.n_encoder_layers,
                                          dim_feedforward=self.d_model,
                                          dropout=self.dropout_rate,
                                          pos_encoding='fixed',
                                          activation='gelu',
                                          norm='BatchNorm',
                                          freeze=False)
        self.model.to(self.device)

        # learning params
        self.epochs = 2000
        self.learning_rate = 0.001
        self.history = {'train_loss': []}

        self.criterion = MaskedMSELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)


    def n_params(self):
        return sum(p.numel() for p in self.model.parameters())

    def train(self, data):
        # segment day into sequences and treat as batches
        self.model.train() # set model into training mode
        train_indices = np.arange(len(data))

        print(f'Starting training for {self.epochs} epochs')
        with tqdm(total=self.epochs) as pbar:
            for epoch in range(self.epochs):
                # loss, metrics
                epoch_loss = 0.0
                epoch_loss_test = 0.0

                # training in batches
                for i, batch_index in enumerate(train_indices):
                    # load training batch
                    full_day = data[batch_index].copy()

                    # create mask (on-the-fly) (0: available, 1: masked (purposefully set to NaN), 2: missing (NaN from beginning))
                    mask = masker(full_day, lm=self.lm, masking_ratio=self.masking_ratio)

                    # reshape
                    full_day = full_day.transpose().to_numpy() # pandas -> numpy + reshape
                    mask = mask.transpose()

                    # split day into batches
                    n_batches = full_day.shape[0] // self.sequence_length
                    batch = np.array([full_day[(i * self.sequence_length):((i + 1) * self.sequence_length), :] for i in range(n_batches)])
                    mask = np.array([mask[(i * self.sequence_length):((i + 1) * self.sequence_length), :] for i in range(n_batches)])

                    # mask missing data
                    y_true = torch.tensor(batch).float().to(self.device) # target values shouldn't be masked
                    batch[mask.astype(bool)] = 0.0 # missing data -> 0.0 for transformer
                    batch = torch.tensor(batch).float().to(self.device) # array -> tensor + add batch dimension
                    X = batch

                    # predict
                    self.optimizer.zero_grad()
                    y_pred = self.model(X, padding_masks=torch.ones(n_batches, self.sequence_length).bool().to(self.device)) # padding mask all True

                    # update model
                    mask = torch.tensor(mask == 1.0).bool().to(self.device) # we don't evaluate on real missing data (False: ignore, True: calc. error)
                    #mask = torch.tensor(mask != 2.0).unsqueeze(0).bool().to(device) # we don't evaluate on real missing data (False: ignore, True: calc. error)
                    loss = self.criterion(y_pred, y_true, mask=mask)
                    loss.backward()
                    epoch_loss += loss.item()
                    self.optimizer.step()

                    # on epoch end shuffle data
                    np.random.shuffle(train_indices)

                # save data
                with torch.no_grad():
                    self.history['train_loss'].append(epoch_loss / len(train_indices))

                # print
                pbar.update(1)
                pbar.set_description(f'Epochs {epoch + 1}/{self.epochs} - train loss: {epoch_loss / len(train_indices)}')

        # plot training curve
        plt.plot(self.history['train_loss'])
        plt.xlabel('epochs')
        plt.ylabel('training loss')
        plt.title('Training loss')

    def impute(self, data):
        self.model.eval()

        data_imputed = []
        for day in range(len(data)):
            data_day = data[day] # data for current day
            data_imputed.append(pd.DataFrame(masked_prediction(self.model, data_day, self.device)))

        return data_imputed

    def load(self):
        # model
        self.model = torch.load('./Models/transformer_imputation_final')

        # history
        with open(f'transformer_imputation_final_history.txt','w') as dat:
            dat.write(str(self.history))

    '''def save(self):
        torch.save(self.model, f'./Models/transformer_imputation_final')'''

In [3]:
transformer = TransformerImputer()

transformer.load()

cuda


In [5]:
transformer.model

TSTransformerEncoder(
  (dropout1): Dropout(p=0.1, inplace=False)
  (output_layer): Linear(in_features=16, out_features=10, bias=True)
  (project_inp): Linear(in_features=10, out_features=16, bias=True)
  (pos_enc): FixedPositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerBatchNormEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
        )
        (dropout1): Dropout(p=0.1, inplace=False)
        (norm1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (linear1): Linear(in_features=16, out_features=16, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=16, out_features=16, bias=True)
        (dropout2): Dropout(p=0.1, inplace=False)
        (norm2): BatchNorm1d(16, eps=1e-05, momentum=0.1, a

In [9]:
from imputation_utils import *

# Import data

In [10]:
data, VARIABLES = import_data(discard_variables=True, discard_days=True, THRESHOLD=60)

discarded variables: ['GalvanicSkinResponse', 'ActivityClass']
discarded days (less than 60min of data): [312]
discarded days (sensor out all day): [114]


# Imputation

Format data into daily dataframes (for imputation model)

In [11]:
data_daily = data_to_days(data)

100%|██████████| 408/408 [02:05<00:00,  3.24it/s]


Normalize day-variable-wise

In [12]:
normalize_by_day(data_daily)

day 0 - ActivityCounts: mean: 1.2688263138573217e-17, std: 0.9999999999999959
day 0 - Barometer: mean: -5.938107148852266e-14, std: 0.9999999999999997
day 0 - BloodPerfusion: mean: 2.3770417019099194e-16, std: 1.000000000000001
day 0 - BloodPulseWave: mean: 1.092154295472125e-16, std: 1.0000000000000004
day 0 - EnergyExpenditure: mean: -5.075305255429287e-17, std: 1.0000000000000064
day 0 - HR: mean: -9.765144288927236e-16, std: 1.0
day 0 - HRV: mean: 1.5650721051984586e-16, std: 0.9999999999999997
day 0 - RESP: mean: -3.760906729921135e-16, std: 0.9999999999999998
day 0 - Steps: mean: -3.6478756523398e-17, std: 0.9999999999999938
day 0 - SkinTemperature: mean: 2.816794416763254e-15, std: 0.999999999999999
day 1 - ActivityCounts: mean: 0.0, std: 0.9999999999999909
day 1 - Barometer: mean: 7.088903107932162e-14, std: 0.9999999999999981
day 1 - BloodPerfusion: mean: -3.120978341552417e-17, std: 0.9999999999999994
day 1 - BloodPulseWave: mean: 6.241956683104833e-17, std: 0.999999999999999

In [13]:
lm = 43
r = 0.21

In [39]:
def embedding(model, X, padding_masks):
    # Embedding + PE
    # a) For PyTorch compatibility
    # permute because pytorch convention for transformers is [seq_length, batch_size, feat_dim]. padding_masks [batch_size, feat_dim]
    inp = X.permute(1, 0, 2)
    # b) Embedding of input sequence (dimension: seq_length -> d_model)
    inp = model.project_inp(inp) * math.sqrt(model.d_model)  # [seq_length, batch_size, d_model] project input vectors to d_model dimensional space
    # c) Positional encoding
    inp = model.pos_enc(inp)  # add positional encoding
    # NOTE: logic for padding masks is reversed to comply with definition in MultiHeadAttention, TransformerEncoderLayer

    # Encoder
    # a) Get embedding through encoder (with paddings (for missing data))
    output = model.transformer_encoder(inp, src_key_padding_mask=~padding_masks)  # (seq_length, batch_size, d_model)
    # b) Apply activation-function
    output = model.act(output)  # the output transformer encoder/decoder embeddings don't include non-linearity
    # c) Reshape
    '''output = output.permute(1, 0, 2)  # (batch_size, seq_length, d_model)
    # d) Final dropout
    output = model.dropout1(output)

    # MLP
    # a) Padding -> 0.0
    output = output * padding_masks.unsqueeze(-1)  # zero-out padding embeddings
    # b) Reshape
    output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
    # c) Run through MLP
    output = self.output_layer(output)  # (batch_size, num_classes)'''

    return output

In [40]:
def batchify_day(data_day):
    with torch.no_grad():
        sequence_length = transformer.sequence_length

    # reshape
    data_day = data_day.transpose().to_numpy() # pandas -> numpy + reshape

    # split day into batches
    n_batches = data_day.shape[0] // sequence_length
    batch = np.array([data_day[(i * sequence_length):((i + 1) * sequence_length), :] for i in range(n_batches)])

    # mask missing data
    batch[np.isnan(batch)] = 0.0 # missing data -> 0.0 for transformer
    batch = torch.tensor(batch).float().to(transformer.device) # array -> tensor + add batch dimension

    return batch

In [41]:
X = batchify_day(data_daily[0])

In [42]:
print(X)

tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0

In [43]:
X.shape

torch.Size([30, 48, 10])

In [44]:
# predict
y_pred = embedding(transformer.model, X, padding_masks=torch.ones(1440 // transformer.sequence_length, transformer.sequence_length).bool().to(transformer.device)) # padding mask all True

In [45]:
y_pred.shape

torch.Size([48, 30, 16])

In [46]:
X

tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0