In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import time

### Dataset:

In [2]:
try:
    from fddbenchmark import FDDDataset, FDDDataloader, FDDEvaluator
except:
    !git clone https://github.com/airi-industrial-ai/fddbenchmark
    from fddbenchmark import FDDDataset, FDDDataloader, FDDEvaluator

In [3]:
tep_dataset = FDDDataset(name='reinartz_tep', splitting_type='supervised')
tep_dataset.df = (tep_dataset.df - tep_dataset.df.mean())/tep_dataset.df.std()
tep_dataset.df.head()

Reading data/reinartz_tep/dataset.csv: 100%|██████████| 5600000/5600000 [00:42<00:00, 133171.63it/s]
Reading data/reinartz_tep/labels.csv: 100%|██████████| 5600000/5600000 [00:01<00:00, 3074981.19it/s]
Reading data/reinartz_tep/train_mask.csv: 100%|██████████| 5600000/5600000 [00:01<00:00, 3047439.37it/s]
Reading data/reinartz_tep/test_mask.csv: 100%|██████████| 5600000/5600000 [00:01<00:00, 3091692.67it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,xmeas_1,xmeas_2,xmeas_3,xmeas_4,xmeas_5,xmeas_6,xmeas_7,xmeas_8,xmeas_9,xmeas_10,...,xmv_2,xmv_3,xmv_4,xmv_5,xmv_6,xmv_7,xmv_8,xmv_9,xmv_10,xmv_11
run_id,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1334009671,1,-0.175458,0.097509,-0.09293,0.022772,0.079,0.056226,0.057243,0.078469,-0.042816,-0.08158,...,-0.101671,-0.237457,-0.139339,,-0.082903,0.236431,0.025629,,-0.149639,-1.448923
1334009671,2,-0.158513,-0.887449,-0.243139,0.211698,-0.352012,-0.553442,-0.006126,-0.248531,-0.358203,-0.098747,...,-0.037165,-0.234886,-0.141768,,-0.105557,0.236835,0.066062,,-0.203725,-0.178945
1334009671,3,-0.16695,1.86423,-0.384321,-0.192391,-0.54046,-0.496256,0.047474,0.041677,0.116272,-0.115753,...,-0.131093,-0.232738,-0.140334,,-0.120353,0.108759,-0.071836,,0.04811,0.073242
1334009671,4,-0.165755,-0.425304,-0.28945,0.085419,1.186139,0.114638,-0.020781,0.570426,0.066785,-0.146494,...,-0.067351,-0.231545,-0.160266,,-0.144987,0.036609,-0.135977,,-0.178125,0.081174
1334009671,5,-0.179773,1.085111,-0.254781,0.393148,-0.116478,-0.816772,0.051283,-0.513587,-0.061174,-0.18499,...,-0.059039,-0.230173,-0.122039,,-0.166149,0.122855,0.056399,,-0.243887,1.057207


In [4]:
tep_dataset.df['xmv_5'] = 1.0
tep_dataset.df['xmv_9'] = 1.0

### Dataloader:

In [5]:
train_dl = FDDDataloader(
    dataframe=tep_dataset.df,
    mask=tep_dataset.train_mask,
    labels=tep_dataset.labels,
    window_size=100,
    step_size=1,
    minibatch_training=True,
    batch_size=512,
    shuffle=True
)

test_dl = FDDDataloader(
    dataframe=tep_dataset.df,
    mask=tep_dataset.test_mask,
    labels=tep_dataset.labels,
    window_size=100, 
    step_size=1, 
    minibatch_training=True,
    batch_size=512
)

Creating sequence of samples: 100%|██████████| 2240/2240 [01:37<00:00, 23.06it/s]
Creating sequence of samples: 100%|██████████| 560/560 [00:23<00:00, 23.37it/s]


### Model:

In [10]:
num_nodes = 52 # number of nodes
window_size = 100 # window size
ngnn = 4 # number of graph structure learning layers
gsllayer = 'relu' # type of graph structure learning layer ('relu', 'directed', 'unidirected', 'undirected')
nhidden = 256 # feature size in GCN layers
alpha = 0.1 # hyperparameter for weights of edges
k = None # max number of edges for each node
out_channels = 29 # number of process states

In [11]:
from models import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = GNNTEP(num_nodes, window_size, ngnn, gsllayer, nhidden,
               alpha, k, out_channels, device)

### Training:

In [12]:
model.to(device)
optimizer = Adam(model.parameters(), lr=0.001)
n_epochs = 50

In [13]:
%%time
for e in range(n_epochs):
    model.train()
    av_loss = []
    for train_ts, train_index, train_label in train_dl:
        m = torch.FloatTensor(train_ts)
        v_train_ts = torch.transpose(m, 1, 2)
        train_label = torch.LongTensor(train_label.values).to(device)
        weight = torch.ones(29) * 0.5
        weight[1:] /= 20
        logits = model(v_train_ts)
        loss = F.cross_entropy(logits, train_label, weight=weight.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        av_loss.append(loss.item())
    print(f'Epoch: {e+1:2d}/{n_epochs}, average CE loss: {sum(av_loss)/len(av_loss):.4f}')
    
    preds = []
    test_labels = []
    model.eval()
    for test_ts, test_index, test_label in test_dl:
        m = torch.FloatTensor(test_ts)
        v_test_ts = torch.transpose(m, 1, 2)
        with torch.no_grad():
            logits = model(v_test_ts)
        pred = logits.argmax(axis=1).cpu().numpy()
        preds.append(pd.Series(pred, index=test_index))
        test_labels.append(test_label)
    pred = pd.concat(preds)
    test_label = pd.concat(test_labels)
    
    evaluator = FDDEvaluator(
    splitting_type=tep_dataset.splitting_type, 
    step_size=test_dl.step_size
    )
    evaluator.print_metrics(test_label, pred)
    
    torch.save(model, 'directed_epoch_'+str(e+1)+'.pth')

Epoch:  1/50, average CE loss: 0.1539
FDD metrics
-----------------
TPR/FPR:
    Fault 01: 0.9950/0.0000
    Fault 02: 0.9906/0.0000
    Fault 03: 0.9331/0.0003
    Fault 04: 0.9986/0.0000
    Fault 05: 0.9784/0.0000
    Fault 06: 0.9982/0.0000
    Fault 07: 0.9986/0.0000
    Fault 08: 0.9794/0.0000
    Fault 09: 0.3144/0.0004
    Fault 10: 0.9704/0.0000
    Fault 11: 0.9886/0.0000
    Fault 12: 0.9818/0.0000
    Fault 13: 0.9581/0.0000
    Fault 14: 0.9964/0.0000
    Fault 15: 0.0092/0.0000
    Fault 16: 0.4962/0.0006
    Fault 17: 0.9757/0.0000
    Fault 18: 0.9584/0.0000
    Fault 19: 0.9884/0.0000
    Fault 20: 0.9701/0.0000
    Fault 21: 0.0000/0.0000
    Fault 22: 0.7031/0.0000
    Fault 23: 0.0000/0.0000
    Fault 24: 0.9902/0.0000
    Fault 25: 0.9883/0.0000
    Fault 26: 0.9796/0.0000
    Fault 27: 0.9894/0.0000
    Fault 28: 0.9550/0.0000
Detection TPR: 0.8508
Detection FPR: 0.0013
Average Detection Delay (ADD): 55.85
Total Correct Diagnosis Rate (Total CDR): 0.9691

Clusteri

Epoch:  9/50, average CE loss: 0.0606
FDD metrics
-----------------
TPR/FPR:
    Fault 01: 0.9962/0.0000
    Fault 02: 0.9939/0.0000
    Fault 03: 0.9820/0.0000
    Fault 04: 0.9982/0.0000
    Fault 05: 0.9879/0.0000
    Fault 06: 0.9993/0.0000
    Fault 07: 0.9986/0.0000
    Fault 08: 0.9818/0.0000
    Fault 09: 0.6069/0.0000
    Fault 10: 0.9762/0.0000
    Fault 11: 0.9918/0.0000
    Fault 12: 0.9865/0.0000
    Fault 13: 0.9735/0.0000
    Fault 14: 0.9960/0.0000
    Fault 15: 0.2706/0.0018
    Fault 16: 0.8702/0.0008
    Fault 17: 0.9789/0.0000
    Fault 18: 0.9637/0.0000
    Fault 19: 0.9930/0.0000
    Fault 20: 0.9711/0.0000
    Fault 21: 0.0000/0.0000
    Fault 22: 0.6455/0.0000
    Fault 23: 0.9475/0.0003
    Fault 24: 0.9908/0.0000
    Fault 25: 0.9898/0.0000
    Fault 26: 0.9831/0.0000
    Fault 27: 0.9923/0.0000
    Fault 28: 0.9622/0.0000
Detection TPR: 0.9200
Detection FPR: 0.0029
Average Detection Delay (ADD): 36.50
Total Correct Diagnosis Rate (Total CDR): 0.9715

Clusteri

Epoch: 17/50, average CE loss: 0.0479
FDD metrics
-----------------
TPR/FPR:
    Fault 01: 0.9942/0.0000
    Fault 02: 0.9941/0.0001
    Fault 03: 0.9791/0.0000
    Fault 04: 0.9985/0.0000
    Fault 05: 0.9876/0.0000
    Fault 06: 0.9992/0.0000
    Fault 07: 0.9986/0.0000
    Fault 08: 0.9805/0.0000
    Fault 09: 0.5706/0.0000
    Fault 10: 0.9754/0.0000
    Fault 11: 0.9902/0.0000
    Fault 12: 0.9858/0.0000
    Fault 13: 0.9741/0.0001
    Fault 14: 0.9959/0.0000
    Fault 15: 0.3781/0.0051
    Fault 16: 0.9165/0.0009
    Fault 17: 0.9790/0.0000
    Fault 18: 0.9640/0.0000
    Fault 19: 0.9926/0.0000
    Fault 20: 0.9708/0.0000
    Fault 21: 0.0000/0.0000
    Fault 22: 0.6784/0.0000
    Fault 23: 0.9647/0.0014
    Fault 24: 0.9904/0.0000
    Fault 25: 0.9896/0.0000
    Fault 26: 0.9808/0.0000
    Fault 27: 0.9919/0.0000
    Fault 28: 0.9656/0.0001
Detection TPR: 0.9275
Detection FPR: 0.0077
Average Detection Delay (ADD): 32.75
Total Correct Diagnosis Rate (Total CDR): 0.9699

Clusteri

Epoch: 25/50, average CE loss: 0.0407
FDD metrics
-----------------
TPR/FPR:
    Fault 01: 0.9944/0.0000
    Fault 02: 0.9934/0.0001
    Fault 03: 0.9731/0.0000
    Fault 04: 0.9984/0.0000
    Fault 05: 0.9835/0.0000
    Fault 06: 0.9990/0.0000
    Fault 07: 0.9986/0.0000
    Fault 08: 0.9816/0.0000
    Fault 09: 0.5298/0.0000
    Fault 10: 0.9740/0.0000
    Fault 11: 0.9900/0.0000
    Fault 12: 0.9855/0.0000
    Fault 13: 0.9731/0.0001
    Fault 14: 0.9965/0.0000
    Fault 15: 0.3456/0.0048
    Fault 16: 0.9219/0.0019
    Fault 17: 0.9778/0.0001
    Fault 18: 0.9639/0.0001
    Fault 19: 0.9920/0.0000
    Fault 20: 0.9706/0.0000
    Fault 21: 0.0004/0.0003
    Fault 22: 0.6970/0.0000
    Fault 23: 0.9619/0.0008
    Fault 24: 0.9900/0.0000
    Fault 25: 0.9890/0.0000
    Fault 26: 0.9804/0.0000
    Fault 27: 0.9909/0.0000
    Fault 28: 0.9657/0.0001
Detection TPR: 0.9264
Detection FPR: 0.0084
Average Detection Delay (ADD): 30.75
Total Correct Diagnosis Rate (Total CDR): 0.9684

Clusteri

Epoch: 33/50, average CE loss: 0.0356
FDD metrics
-----------------
TPR/FPR:
    Fault 01: 0.9953/0.0000
    Fault 02: 0.9926/0.0000
    Fault 03: 0.9754/0.0000
    Fault 04: 0.9984/0.0000
    Fault 05: 0.9849/0.0000
    Fault 06: 0.9992/0.0000
    Fault 07: 0.9986/0.0000
    Fault 08: 0.9811/0.0000
    Fault 09: 0.5038/0.0002
    Fault 10: 0.9731/0.0000
    Fault 11: 0.9902/0.0000
    Fault 12: 0.9845/0.0000
    Fault 13: 0.9727/0.0001
    Fault 14: 0.9949/0.0000
    Fault 15: 0.5184/0.0197
    Fault 16: 0.9209/0.0022
    Fault 17: 0.9779/0.0001
    Fault 18: 0.9638/0.0001
    Fault 19: 0.9916/0.0000
    Fault 20: 0.9706/0.0001
    Fault 21: 0.0031/0.0029
    Fault 22: 0.7123/0.0001
    Fault 23: 0.9577/0.0004
    Fault 24: 0.9905/0.0000
    Fault 25: 0.9886/0.0000
    Fault 26: 0.9814/0.0001
    Fault 27: 0.9908/0.0000
    Fault 28: 0.9680/0.0005
Detection TPR: 0.9341
Detection FPR: 0.0266
Average Detection Delay (ADD): 23.32
Total Correct Diagnosis Rate (Total CDR): 0.9665

Clusteri

Epoch: 41/50, average CE loss: 0.0318
FDD metrics
-----------------
TPR/FPR:
    Fault 01: 0.9949/0.0000
    Fault 02: 0.9932/0.0000
    Fault 03: 0.9777/0.0000
    Fault 04: 0.9979/0.0000
    Fault 05: 0.9870/0.0000
    Fault 06: 0.9992/0.0000
    Fault 07: 0.9986/0.0000
    Fault 08: 0.9814/0.0000
    Fault 09: 0.5222/0.0001
    Fault 10: 0.9735/0.0001
    Fault 11: 0.9910/0.0000
    Fault 12: 0.9855/0.0001
    Fault 13: 0.9725/0.0000
    Fault 14: 0.9960/0.0000
    Fault 15: 0.3216/0.0047
    Fault 16: 0.9275/0.0034
    Fault 17: 0.9782/0.0002
    Fault 18: 0.9639/0.0002
    Fault 19: 0.9909/0.0000
    Fault 20: 0.9706/0.0001
    Fault 21: 0.0063/0.0046
    Fault 22: 0.7004/0.0001
    Fault 23: 0.9669/0.0021
    Fault 24: 0.9899/0.0000
    Fault 25: 0.9887/0.0000
    Fault 26: 0.9816/0.0001
    Fault 27: 0.9900/0.0000
    Fault 28: 0.9569/0.0001
Detection TPR: 0.9267
Detection FPR: 0.0160
Average Detection Delay (ADD): 24.53
Total Correct Diagnosis Rate (Total CDR): 0.9675

Clusteri

Epoch: 49/50, average CE loss: 0.0289
FDD metrics
-----------------
TPR/FPR:
    Fault 01: 0.9949/0.0000
    Fault 02: 0.9912/0.0000
    Fault 03: 0.9776/0.0000
    Fault 04: 0.9982/0.0000
    Fault 05: 0.9850/0.0000
    Fault 06: 0.9988/0.0000
    Fault 07: 0.9986/0.0000
    Fault 08: 0.9800/0.0000
    Fault 09: 0.5232/0.0001
    Fault 10: 0.9750/0.0001
    Fault 11: 0.9909/0.0000
    Fault 12: 0.9842/0.0001
    Fault 13: 0.9734/0.0002
    Fault 14: 0.9960/0.0000
    Fault 15: 0.3222/0.0054
    Fault 16: 0.9210/0.0023
    Fault 17: 0.9779/0.0002
    Fault 18: 0.9635/0.0002
    Fault 19: 0.9909/0.0001
    Fault 20: 0.9699/0.0001
    Fault 21: 0.0067/0.0057
    Fault 22: 0.6850/0.0000
    Fault 23: 0.9665/0.0022
    Fault 24: 0.9898/0.0000
    Fault 25: 0.9886/0.0000
    Fault 26: 0.9816/0.0002
    Fault 27: 0.9876/0.0000
    Fault 28: 0.9604/0.0002
Detection TPR: 0.9263
Detection FPR: 0.0172
Average Detection Delay (ADD): 22.23
Total Correct Diagnosis Rate (Total CDR): 0.9669

Clusteri