# ©2023 EDF
Adrien PETRALIA - EDF R&D and Université Paris Cité (LIPADE)

# ADF & TransApp - Notebook example
## A Transformer-Based Framework for Appliance Detection Using Smart Meter Consumption Series 

In [1]:
import os, sys
import numpy as np
import pandas as pd
from pathlib import Path

import torch
import torch.nn as nn

root = Path(os.getcwd()).resolve().parents[0]
sys.path.append(str(root))
from experiments.data_utils import *
from src.TransAppModel.TransApp import *
from src.AD_Framework.Framework import *
from src.utils.losses import *

from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

from torch.utils.data import DataLoader, Dataset
from torchinfo import summary

  from .autonotebook import tqdm as notebook_tqdm


## Instantiation of a TransApp Model

In [13]:
def get_model_inst(m, win, dim_model, mode="pretraining", large_version=False, path_select_core=None):
    """
    Description:
        Get TransApp model instance
    
    Parameters:
        m: int - n channel of input time series
        win: int - length of input subsequence (usefull for positional encoding, if any)
        mode: str - 'pretraining' or 'classif' (type of head)
        large_version: boolean - if true, use 5 encoder layers instead of 3
        path_select_core: str - path to pretrained instance of TransApp 
    """

    TApp = TransApp(max_len=win, c_in=m,
                    mode=mode,
                    n_embed_blocks=1, 
                    encoding_type='noencoding',
                    n_encoder_layers=5 if large_version else 3,
                    kernel_size=5,
                    d_model=dim_model, pffn_ratio=2, n_head=4,
                    prenorm=True, norm="LayerNorm",
                    activation='gelu',
                    store_att=False, attn_dp_rate=0.2, head_dp_rate=0., dp_rate=0.2,
                    att_param={'attenc_mask_diag': True, 'attenc_mask_flag': False, 'learnable_scale_enc': False},
                    c_reconstruct=1, apply_gap=True, nb_class=2)

    if path_select_core is not None:
        TApp.load_state_dict(torch.load(path_select_core)['model_state_dict'])

    return TApp

## Self-supervised pretraining

In [4]:
m       = 5    # Number of channel of the input time series (i.e. consumption time series, hours encoded in sin/cos based , days encoded in sin/cos based)
win     = 1024 # Choseen length of slicing window size
d_model = 64   # Inner dimension of the model

TransAppInstance = get_model_inst(m=m, win=win, dim_model=d_model, mode="pretraining") # Pretraining mode of our TransApp model

summary(TransAppInstance, input_size=(1, m, win), mode="train", device='cpu') # show TransApp architecture with pretraining head

Layer (type:depth-idx)                                  Output Shape              Param #
TransApp                                                [1, 1, 1024]              --
├─Sequential: 1-1                                       [1, 1024, 64]             --
│    └─DilatedBlock: 2-1                                [1, 64, 1024]             --
│    │    └─Sequential: 3-1                             [1, 64, 1024]             64,192
│    └─Transpose: 2-2                                   [1, 1024, 64]             --
├─Sequential: 1-2                                       [1, 1024, 64]             37,888
│    └─EncoderLayer: 2-3                                [1, 1024, 64]             16,832
│    │    └─LayerNorm: 3-4                              [1, 1024, 64]             (recursive)
│    │    └─AttentionLayer: 3-3                         [1, 1024, 64]             16,640
│    │    └─LayerNorm: 3-4                              [1, 1024, 64]             (recursive)
│    │    └─LayerNorm: 3-1

### Get pretraining data

In [5]:
data_pretraining = CER_get_data_pretraining(exo_variable=['hours_cos', 'hours_sin', 'days_cos', 'days_sin'])

### Pretrainer instance

In [8]:
dict_params = {'lr': 1e-4, 'wd': 1e-4, 'batch_size': 16, 'epochs': 2}
save_path = str(root) + '/tmp/TransAppPT' # Model save path

pretraining_dataset = TSDataset(data_pretraining, scaler=True, scale_dim=[0])
train_loader = torch.utils.data.DataLoader(pretraining_dataset, batch_size=dict_params['batch_size'], shuffle=True)

GeomMask = GeometricMask(mean_length=24, masking_ratio=0.5, type_corrupt='zero', dim_masked=0) # Mask to corrupt inout time series

model_pretrainer = self_pretrainer(TransAppInstance,                                     
                                   train_loader, valid_loader=None,
                                   learning_rate=dict_params['lr'], weight_decay=dict_params['wd'],
                                   name_scheduler='CosineAnnealingLR',
                                   dict_params_scheduler={'T_max': dict_params['epochs'], 'eta_min': 1e-6},
                                   warmup_duration=None,
                                   criterion=MaskedMSELoss(type_loss='L1'), mask=GeomMask,
                                   device="cuda", all_gpu=False,
                                   verbose=True, plotloss=True, 
                                   save_fig=False, path_fig=None,
                                   save_only_core=False,
                                   save_checkpoint=True, path_checkpoint=save_path)

Adjusting learning rate of group 0 to 1.0000e-04.


### Training process

In [9]:
model_pretrainer.train(dict_params['epochs'])

Epoch [1/2]
    Train loss : 0.462713
Adjusting learning rate of group 0 to 5.0500e-05.
Epoch [2/2]
    Train loss : 0.458446
Adjusting learning rate of group 0 to 1.0000e-06.


## Finetuning the pretrained model for Appliance Detection (i.e., a chosen classification case)

In [14]:
TransAppInstance.mode = "classif" # Change the mode of the TransApp architecture, i.e. use a classification head
# OR
TransAppInstance = get_model_inst(m=m, win=win, dim_model=d_model, mode="classif", path_select_core= str(root) + '/tmp/TransAppPT.pt') # Load previous pretrained instance

summary(TransAppInstance, input_size=(1, m, win), mode="train", device='cpu') # show TransApp architecture with classification head

Layer (type:depth-idx)                                  Output Shape              Param #
TransApp                                                [1, 2]                    65
├─Sequential: 1-1                                       [1, 1024, 64]             --
│    └─DilatedBlock: 2-1                                [1, 64, 1024]             --
│    │    └─Sequential: 3-1                             [1, 64, 1024]             64,192
│    └─Transpose: 2-2                                   [1, 1024, 64]             --
├─Sequential: 1-2                                       [1, 1024, 64]             --
│    └─EncoderLayer: 2-3                                [1, 1024, 64]             --
│    │    └─LayerNorm: 3-2                              [1, 1024, 64]             128
│    │    └─AttentionLayer: 3-3                         [1, 1024, 64]             16,640
│    │    └─LayerNorm: 3-4                              [1, 1024, 64]             128
│    │    └─PositionWiseFeedForward: 3-5          

### Select a possible detection case on CER dataset

- cooker_case
- dishwasher_case
- waterheater_case
- pluginheater_case
- tumbledryer_case
- tv_greater21inch_case
- tv_lessr21inch_case
- desktopcomputer_case
- laptopcomputer_case

In [5]:
case = 'cooker_case' # exemple of detecting cooker in consumption series

datas_tuple = CER_get_data_case('cooker_case', seed=0, exo_variable=['hours_cos', 'hours_sin', 'days_cos', 'days_sin'], win=win)

### AD Framework instance and training

In [19]:
dict_params = {'lr': 1e-4, 'wd': 1e-3, 'batch_size': 16, 'epochs': 2, 'p_es': 5, 'p_rlr': 3, 'n_warmup_epochs': 0}
save_path = str(root) + '/tmp/TransAppPTFinetuned'

# Scliced data for training
X_train = datas_tuple[0]
y_train = datas_tuple[1]
X_valid = datas_tuple[2]
y_valid = datas_tuple[3]
X_test  = datas_tuple[4]
y_test  = datas_tuple[5]

# Entire curves data for evaluate the model
X_train_voter = datas_tuple[6]
y_train_voter = datas_tuple[7]
X_valid_voter = datas_tuple[8]
y_valid_voter = datas_tuple[9]
X_test_voter  = datas_tuple[10]
y_test_voter  = datas_tuple[11]

# Dataset
train_dataset = TSDataset(X_train, y_train, scaler=True, scale_dim=[0])
valid_dataset = TSDataset(X_valid, y_valid, scaler=True, scale_dim=[0])
test_dataset  = TSDataset(X_test,  y_test,   scaler=True, scale_dim=[0])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=dict_params['batch_size'], shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True)

# AD Framework trainer
model_trainer = AD_Framework(TransAppInstance,
                             train_loader=train_loader, valid_loader=valid_loader,
                             learning_rate=dict_params['lr'], weight_decay=dict_params['wd'],
                             criterion=nn.CrossEntropyLoss(),
                             patience_es=dict_params['p_es'], patience_rlr=dict_params['p_rlr'],
                             f_metrics=getmetrics(),
                             n_warmup_epochs=dict_params['n_warmup_epochs'],
                             scale_by_subseq_in_voter=True, scale_dim=[0],
                             verbose=True, plotloss=True, 
                             save_fig=False, path_fig=None,
                             device="cuda", all_gpu=False,
                             save_checkpoint=True, path_checkpoint=save_path)

### Training process

In [20]:
model_trainer.train(dict_params['epochs'])

Epoch [1/2]
    Train loss : 0.5554, Train acc : 72.21%
    Valid  loss : 0.5587, Valid  acc : 73.83%
Epoch [2/2]
    Train loss : 0.5385, Train acc : 73.44%
    Valid  loss : 0.5823, Valid  acc : 72.88%


### Model evaluation

In [21]:
#============ eval last model on subsequences ============#
model_trainer.evaluate(torch.utils.data.DataLoader(test_dataset, batch_size=1), mask='test_metrics_lastmodel')

#============ restore best weight ============#    
model_trainer.restore_best_weights()

#============ eval model on subsequences  ============#   
model_trainer.evaluate(torch.utils.data.DataLoader(test_dataset, batch_size=1))

#============ find best quantile on valid voter dataset ============#
model_trainer.ADFFindBestQuantile(TSDataset(X_valid_voter, y_valid_voter), m=m, win=win)

#============ evaluate on test voter dataset using best quantile ============#
quant_metric = model_trainer.ADFvoter_proba(TSDataset(X_test_voter, y_test_voter), m=m, win=win)
print(quant_metric)

Restored best model met during training.
{'ACCURACY': 0.7589670014347202, 'PRECISION': 0.5026178010471204, 'RECALL': 0.5680473372781065, 'PRECISION_MACRO': 0.6791745131717815, 'RECALL_MACRO': 0.694061547426932, 'F1_SCORE': 0.5333333333333334, 'F1_SCORE_MACRO': 0.6854287556415217, 'F1_SCORE_WEIGHTED': 0.763767717777303, 'CONFUSION_MATRIX': array([[ 96,  73],
       [ 95, 433]]), 'ROC_AUC_SCORE': 0.7570602474448629, 'ROC_AUC_SCORE_MACRO': 0.7570602474448629, 'ROC_AUC_SCORE_WEIGHTED': 0.7570602474448629}
