# ©2023 EDF
Adrien PETRALIA - EDF R&D and Université Paris Cité (LIPADE)

# ADF & TransApp
## A Transformer-Based Framework for Appliance Detection Using Smart Meter Consumption Series 

## Notebook example

In [5]:
import os, sys
import numpy as np
import pandas as pd
from pathlib import Path

import torch
import torch.nn as nn

root = Path(os.getcwd()).resolve().parents[0]
sys.path.append(str(root))
from experiments.data_utils import *
from src.TransAppModel.TransApp import *
from src.AD_Framework.Framework import *
from src.utils.losses import *

from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

from torch.utils.data import DataLoader, Dataset
from torchinfo import summary

## Instantiation of a TransApp Model

In [2]:
def get_model_inst(m, win, dim_model, mode="pretraining"):

    TApp = TransApp(max_len=win, c_in=m,
                    mode="pretraining",
                    n_embed_blocks=1, 
                    encoding_type='noencoding',
                    n_encoder_layers=3,
                    kernel_size=5,
                    d_model=dim_model, pffn_ratio=2, n_head=4,
                    prenorm=True, norm="LayerNorm",
                    activation='gelu',
                    store_att=False, attn_dp_rate=0.2, head_dp_rate=0., dp_rate=0.2,
                    att_param={'attenc_mask_diag': True, 'attenc_mask_flag': False, 'learnable_scale_enc': False},
                    c_reconstruct=1, apply_gap=True, nb_class=2)

    return TApp

In [10]:
TransApp = get_model_inst(m=5, win=1024, dim_model=64, mode="pretraining")

summary(TransApp, input_size=(1, 5, 1024), mode="train", device='cpu')

Layer (type:depth-idx)                                  Output Shape              Param #
TransApp                                                [1, 1, 1024]              --
├─Sequential: 1-1                                       [1, 1024, 64]             --
│    └─DilatedBlock: 2-1                                [1, 64, 1024]             --
│    │    └─Sequential: 3-1                             [1, 64, 1024]             64,192
│    └─Transpose: 2-2                                   [1, 1024, 64]             --
├─Sequential: 1-2                                       [1, 1024, 64]             37,888
│    └─EncoderLayer: 2-3                                [1, 1024, 64]             16,832
│    │    └─LayerNorm: 3-4                              [1, 1024, 64]             (recursive)
│    │    └─AttentionLayer: 3-3                         [1, 1024, 64]             16,640
│    │    └─LayerNorm: 3-4                              [1, 1024, 64]             (recursive)
│    │    └─LayerNorm: 3-1

In [4]:
# Dummy Forward
TransApp(torch.rand(1, 5, 1024))

tensor([[[ 0.9202,  1.3643,  0.6505,  ..., -0.2121, -0.2282,  2.2120]]],
       grad_fn=<PermuteBackward>)

## Self-supervised pretrainer

In [None]:
data_pretraining = CER_get_data_pretraining(exo_variable=['hours_cos', 'hours_sin', 'days_cos', 'days_sin'])

In [11]:
pretraining_dataset = TSDataset(X_train, scaler=True, scale_dim=[0])
train_loader = torch.utils.data.DataLoader(pretraining_dataset, batch_size=dict_params['batch_size'], shuffle=True)

dict_params = {'lr': 1e-4, 'wd': 1e-4, 'batch_size': 16, 'epochs': 20}
save_path = '' # To complete

model_pretrainer = self_pretrainer(TransApp,                                     
                                   train_loader, valid_loader=None,
                                   learning_rate=dict_params['lr'], weight_decay=dict_params['wd'],
                                   name_scheduler='CosineAnnealingLR',
                                   dict_params_scheduler={'T_max': dict_params['epochs'], 'eta_min': 1e-6},
                                   warmup_duration=None,
                                   criterion=MaskedMSELoss(type_loss='L1'), mask=GeomMask,
                                   device="cuda", all_gpu=False,
                                   verbose=True, plotloss=False, 
                                   save_fig=False, path_fig=None,
                                   save_only_core=False,
                                   save_checkpoint=True, path_checkpoint=)

In [None]:
model_pretrainer.train(dict_params['epochs'])

## Finetuning to a Appliance Detection cases

In [12]:
TransApp.mode = "classif"

summary(TransApp, input_size=(1, 5, 1024), mode="train", device='cpu')

Layer (type:depth-idx)                                  Output Shape              Param #
TransApp                                                [1, 2]                    65
├─Sequential: 1-1                                       [1, 1024, 64]             --
│    └─DilatedBlock: 2-1                                [1, 64, 1024]             --
│    │    └─Sequential: 3-1                             [1, 64, 1024]             64,192
│    └─Transpose: 2-2                                   [1, 1024, 64]             --
├─Sequential: 1-2                                       [1, 1024, 64]             --
│    └─EncoderLayer: 2-3                                [1, 1024, 64]             --
│    │    └─LayerNorm: 3-2                              [1, 1024, 64]             128
│    │    └─AttentionLayer: 3-3                         [1, 1024, 64]             16,640
│    │    └─LayerNorm: 3-4                              [1, 1024, 64]             128
│    │    └─PositionWiseFeedForward: 3-5          

In [2]:
datas_tuple = CER_get_data_case('cooker_case', seed=0, exo_variable=['hours_cos', 'hours_sin', 'days_cos', 'days_sin'], win=1024, ratio_resample=0.8, group='residential')

In [19]:
# Scliced data
X_train = datas_tuple[0]
y_train = datas_tuple[1]
X_valid = datas_tuple[2]
y_valid = datas_tuple[3]
X_test  = datas_tuple[4]
y_test  = datas_tuple[5]

# Entire curves data
X_train_voter = datas_tuple[6]
y_train_voter = datas_tuple[7]
X_valid_voter = datas_tuple[8]
y_valid_voter = datas_tuple[9]
X_test_voter  = datas_tuple[10]
y_test_voter  = datas_tuple[11]

# Dataset
train_dataset = TSDataset(X_train, y_train, scaler=True, scale_dim=[0])
valid_dataset = TSDataset(X_valid, y_valid, scaler=True, scale_dim=[0])
test_dataset  = TSDataset(X_test, y_test,   scaler=True, scale_dim=[0])

dict_params = {'lr': 1e-4, 'wd': 1e-3, 'batch_size': 16, 'epochs': 15, 'p_es': 5, 'p_rlr': 3, 'n_warmup_epochs': 0}
save_path = '' # To complete

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=dict_params['batch_size'], shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True)

model_trainer = AD_Framework(TransApp,
                             train_loader=train_loader, valid_loader=valid_loader,
                             learning_rate=dict_params['lr'], weight_decay=dict_params['wd'],
                             criterion=nn.CrossEntropyLoss(),
                             patience_es=dict_params['p_es'], patience_rlr=dict_params['p_rlr'],
                             f_metrics=getmetrics(),
                             n_warmup_epochs=dict_params['n_warmup_epochs'],
                             scale_by_subseq_in_voter=True, scale_dim=[0],
                             verbose=True, plotloss=False, 
                             save_fig=False, path_fig=None,
                             device="cuda", all_gpu=True,
                             save_checkpoint=True, path_checkpoint=save_path)

In [None]:
model_trainer.train(dict_params['epochs'])

In [None]:
#============ eval last model ============#
model_trainer.evaluate(torch.utils.data.DataLoader(test_dataset, batch_size=1), mask='test_metrics_lastmodel')


In [None]:
#============ restore best weight and evaluate ============#    
model_trainer.restore_best_weights()
model_trainer.evaluate(torch.utils.data.DataLoader(test_dataset, batch_size=1))

#============ find best quantile on valid dataset ============#
model_trainer.ADFFindBestQuantile(TSDataset(X_valid_voter, y_valid_voter), m=m, win=win)
quant_metric = model_trainer.ADFvoter_proba(TSDataset(X_test_voter, y_test_voter), m=m, win=win)
print(quant_metric)