In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import numpy as np

In [2]:
from fddbenchmark import FDDDataset, FDDDataloader, FDDEvaluator

#### Загрузка датасета:

In [3]:
# 'small_tep'    - 20 faults, 18.9 MB
# 'rieth_tep'    - 20 faults, 1.84 GB
# 'reinartz_tep' - 28 faults, 1.88 GB

data = FDDDataset(name='reinartz_tep')

Reading data/reinartz_tep/dataset.csv: 100%|██████████| 5600000/5600000 [00:46<00:00, 121219.36it/s]
Reading data/reinartz_tep/labels.csv: 100%|██████████| 5600000/5600000 [00:01<00:00, 3314591.80it/s]
Reading data/reinartz_tep/train_mask.csv: 100%|██████████| 5600000/5600000 [00:01<00:00, 3300176.09it/s]
Reading data/reinartz_tep/test_mask.csv: 100%|██████████| 5600000/5600000 [00:01<00:00, 3346967.24it/s]


#### Стандартизация данных:

In [4]:
data.df = (data.df - data.df.mean()) / data.df.std()
data.df['xmv_5'] = 0.0
data.df['xmv_9'] = 0.0
data.df.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,xmeas_1,xmeas_2,xmeas_3,xmeas_4,xmeas_5,xmeas_6,xmeas_7,xmeas_8,xmeas_9,xmeas_10,...,xmv_2,xmv_3,xmv_4,xmv_5,xmv_6,xmv_7,xmv_8,xmv_9,xmv_10,xmv_11
run_id,sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1334009671,1,-0.175458,0.097509,-0.09293,0.022772,0.079,0.056226,0.057243,0.078469,-0.042816,-0.08158,...,-0.101671,-0.237457,-0.139339,0.0,-0.082903,0.236431,0.025629,0.0,-0.149639,-1.448923
1334009671,2,-0.158513,-0.887449,-0.243139,0.211698,-0.352012,-0.553442,-0.006126,-0.248531,-0.358203,-0.098747,...,-0.037165,-0.234886,-0.141768,0.0,-0.105557,0.236835,0.066062,0.0,-0.203725,-0.178945


#### Создание загрузчиков данных:

In [5]:
train_dl = FDDDataloader(
    dataframe=data.df,
    mask=data.train_mask,
    labels=data.labels,
    window_size=60,
    step_size=10,
    minibatch_training=True,
    batch_size=512,
    shuffle=True
)

test_dl = FDDDataloader(
    dataframe=data.df,
    mask=data.test_mask,
    labels=data.labels,
    window_size=60, 
    step_size=1, 
    minibatch_training=True,
    batch_size=512
)

evaluator = FDDEvaluator(
    step_size=test_dl.step_size
    )

Creating sequence of samples: 100%|██████████| 2240/2240 [01:22<00:00, 27.31it/s]
Creating sequence of samples: 100%|██████████| 560/560 [00:21<00:00, 26.56it/s]


#### Модель графовой нейронной сети:

In [6]:
# Сверточный графовый слой:
class GCNLayer(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        
        # Данные агрегируются после линейного слоя:
        self.dense = nn.Linear(in_dim, out_dim)

    def forward(self, adj, X):
        # adj - матрица смежности
        # Добавление self loops:
        #adj = adj + torch.eye(adj.size(0)).to(adj.device)
        h = self.dense(X)
        norm = adj.sum(1)**(-1/2)
        h = norm[None, :] * adj * norm[:, None] @ h
        
        return h
    

#Directed adjacency matrix   
class Graph_Directed_A(nn.Module):
      
    def __init__(self, num_nodes=52, window_size=10, alpha=1, device=None):
        super(Graph_Directed_A, self).__init__()
        
        self.alpha = alpha
        self.k = None
        self.device = device
        
        self.e1 = nn.Embedding(num_nodes, window_size)
        self.e2 = nn.Embedding(num_nodes, window_size)
        self.l1 = nn.Linear(window_size,window_size)
        self.l2 = nn.Linear(window_size,window_size)
        
    def forward(self, idx):
        
        m1 = torch.tanh(self.alpha*self.l1(self.e1(idx)))
        m2 = torch.tanh(self.alpha*self.l2(self.e2(idx)))
        adj = F.relu(torch.tanh(self.alpha*torch.mm(m1, m2.transpose(1,0))))
        
        if self.k:
            mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
            mask.fill_(float('0'))
            s1,t1 = (adj + torch.rand_like(adj)*0.01).topk(self.k,1)
            mask.scatter_(1,t1,s1.fill_(1))
            adj = adj*mask
            
        adj = adj + torch.eye(52).to(self.device)
        
        return adj

    
# Графовая нейронная сеть:
class GNN(nn.Module):    
    def __init__(self, A = None, device=None):
        super(GNN, self).__init__()
        self.device = device
        self.pred = False
        if A != None:
            self.adj = A.to(device)
            self.pred = True
        else:
            self.graph_struct = Graph_Directed_A(device=device)
        self.idx = torch.arange(52).to(device)
        
        self.conv1 = GCNLayer(60, 256)
        self.bnorm1 = nn.BatchNorm1d(52)
        self.conv2 = GCNLayer(256, 256)
        self.bnorm2 = nn.BatchNorm1d(52)
        self.fc = nn.Linear(256, 29)
    
    
    def forward(self, X):
        if not self.pred:
            self.adj = self.graph_struct(self.idx)
        # first gcn layer:
        h = self.conv1(self.adj, X.to(self.device)).relu()
        h = self.bnorm1(h)
        skip, _ = torch.min(h,dim=1)
        h = self.conv2(self.adj, h).relu()
        h = self.bnorm2(h)
        h, _ = torch.min(h,dim=1)
        h = h + skip
        
        output = self.fc(h)
        
        return output

#### Код для тренировки модели и подсчета метрик:

In [7]:
import time

In [8]:
def train_and_evaluate(adj_matrix = None):
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GNN(A=adj_matrix,device=device)
    model.to(device)
    optimizer = Adam(model.parameters())
    n_epochs = 35
    weight = torch.ones(29) * 0.5
    weight[1:] /= 28
    
    model.train()
    print(" Training:")
    for e in range(n_epochs):
        av_loss = []
        for train_ts, train_index, train_label in train_dl:
            m = torch.FloatTensor(train_ts)
            v_train_ts = torch.transpose(m, 1, 2)
            train_label = torch.LongTensor(train_label.values).to(device)
            logits = model(v_train_ts)
            loss = F.cross_entropy(logits, train_label, weight=weight.to(device))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            av_loss.append(loss.item())
        print(f'Epoch: {e+1:2d}/{n_epochs}, average CE loss: {sum(av_loss)/len(av_loss):.4f}')
    
    print("\n Evaluation:")
    preds = []
    test_labels = []
    model.eval()
    for test_ts, test_index, test_label in test_dl:
        m = torch.FloatTensor(test_ts)
        v_test_ts = torch.transpose(m, 1, 2)
        with torch.no_grad():
            logits = model(v_test_ts)
        pred = logits.argmax(axis=1).cpu().numpy()
        preds.append(pd.Series(pred, index=test_index))
        test_labels.append(test_label)
    pred = pd.concat(preds)
    test_label = pd.concat(test_labels)
    
    evaluator = FDDEvaluator(
    step_size=test_dl.step_size
    )
    evaluator.print_metrics(test_label, pred)

#### Результат для матрицы корреляции:

In [25]:
A = torch.load('corr_A.pt')

In [26]:
%%time
train_and_evaluate(A)

 Training:
Epoch:  1/35, average CE loss: 0.3960
Epoch:  2/35, average CE loss: 0.1733
Epoch:  3/35, average CE loss: 0.1413
Epoch:  4/35, average CE loss: 0.1286
Epoch:  5/35, average CE loss: 0.1230
Epoch:  6/35, average CE loss: 0.1191
Epoch:  7/35, average CE loss: 0.1144
Epoch:  8/35, average CE loss: 0.1105
Epoch:  9/35, average CE loss: 0.1070
Epoch: 10/35, average CE loss: 0.1034
Epoch: 11/35, average CE loss: 0.0977
Epoch: 12/35, average CE loss: 0.0948
Epoch: 13/35, average CE loss: 0.0926
Epoch: 14/35, average CE loss: 0.0911
Epoch: 15/35, average CE loss: 0.0896
Epoch: 16/35, average CE loss: 0.0886
Epoch: 17/35, average CE loss: 0.0875
Epoch: 18/35, average CE loss: 0.0868
Epoch: 19/35, average CE loss: 0.0863
Epoch: 20/35, average CE loss: 0.0856
Epoch: 21/35, average CE loss: 0.0849
Epoch: 22/35, average CE loss: 0.0848
Epoch: 23/35, average CE loss: 0.0855
Epoch: 24/35, average CE loss: 0.0833
Epoch: 25/35, average CE loss: 0.0816
Epoch: 26/35, average CE loss: 0.0806
E

#### Directed A:

In [27]:
A = torch.load('./Masters-thesis-Industrial-ML-GNN-main/direct_A.pt', map_location=torch.device('cpu'))

In [29]:
A = A + torch.eye(52)

In [30]:
train_and_evaluate(A)

 Training:
Epoch:  1/35, average CE loss: 0.2645
Epoch:  2/35, average CE loss: 0.1255
Epoch:  3/35, average CE loss: 0.1023
Epoch:  4/35, average CE loss: 0.0944
Epoch:  5/35, average CE loss: 0.0892
Epoch:  6/35, average CE loss: 0.0821
Epoch:  7/35, average CE loss: 0.0794
Epoch:  8/35, average CE loss: 0.0768
Epoch:  9/35, average CE loss: 0.0737
Epoch: 10/35, average CE loss: 0.0711
Epoch: 11/35, average CE loss: 0.0707
Epoch: 12/35, average CE loss: 0.0692
Epoch: 13/35, average CE loss: 0.0681
Epoch: 14/35, average CE loss: 0.0669
Epoch: 15/35, average CE loss: 0.0664
Epoch: 16/35, average CE loss: 0.0653
Epoch: 17/35, average CE loss: 0.0666
Epoch: 18/35, average CE loss: 0.0643
Epoch: 19/35, average CE loss: 0.0634
Epoch: 20/35, average CE loss: 0.0632
Epoch: 21/35, average CE loss: 0.0627
Epoch: 22/35, average CE loss: 0.0617
Epoch: 23/35, average CE loss: 0.0614
Epoch: 24/35, average CE loss: 0.0609
Epoch: 25/35, average CE loss: 0.0606
Epoch: 26/35, average CE loss: 0.0607
E

#### A.pt

In [9]:
A = torch.load('A.pt').to('cpu')

In [75]:
A[A>0]=1

In [11]:
A = A + torch.eye(52)

In [None]:
train_and_evaluate(A)

 Training:
Epoch:  1/35, average CE loss: 0.3356
Epoch:  2/35, average CE loss: 0.1940
Epoch:  3/35, average CE loss: 0.1664


In [10]:
A

tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.2285, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.9897, 0.0000, 0.0000]],
       grad_fn=<ToCopyBackward0>)

In [78]:
torch.save(A, 'A.pt')