In [1]:
from typing import Literal
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt

import torch
from torch import nn
import torch.nn.functional as F

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import SGDOneClassSVM
from sklearn.metrics import roc_auc_score

from fddbenchmark.dataset import FDDDataset
from fddbenchmark.dataloader import FDDDataloader
from fddbenchmark.models.detection.isolation_forest import FaultDetectionIsolationForest
from fddbenchmark.models.detection.pca import FaultDetectionPCA
from fddbenchmark.models.detection.mlp_vae import FaultDetectionMLPVAE
from fddbenchmark.models.detection.lstm_vae import FaultDetectionLSTMVAE


# Small TEP

In [None]:
dataset = FDDDataset(name='lessmeier_bearing')

Reading data/lessmeier_bearing/dataset.csv: 100%|█| 87361169/87361169 [00:52<00:00, 16537


In [3]:
train_dataloader = FDDDataloader(
    dataframe=dataset.df,
    mask=dataset.train_mask & (dataset.labels == 0),
    labels=dataset.labels,
    window_size=20,
    dilation=1,
    step_size=1,
)
test_dataloader = FDDDataloader(
    dataframe=dataset.df,
    mask=dataset.test_mask,
    labels=dataset.labels,
    window_size=20,
    dilation=1,
    step_size=1,
)

Creating sequence of samples: 100%|██████████████████████| 96/96 [00:07<00:00, 12.34it/s]
Creating sequence of samples: 100%|██████████████████████| 68/68 [00:04<00:00, 13.68it/s]


## Isolation Forest

In [4]:
forest = FaultDetectionIsolationForest()
forest.fit(train_dataloader)
(x, y, _), = iter(test_dataloader)
roc_auc_score(y > 0, forest.predict(x))

0.6288998010169777

## PCA

In [5]:
for normal_variance_ratio in [0.9, 0.95, 0.99, 0.995, 0.999, 0.9995, 0.9999]:
    pca = FaultDetectionPCA(normal_variance_ratio)
    pca.fit(train_dataloader)
    (x, y, _), = iter(test_dataloader)
    print(roc_auc_score(y > 0, pca.predict(x, scoring='spe')))
    print(roc_auc_score(y > 0, pca.predict(x, scoring='t2')))
    print()

0.2981512347152495
0.5910378344190956

0.32731469854431106
0.5862504116389659

0.602961800397453
0.5817696378862426

0.602961800397453
0.5817696378862426

0.602961800397453
0.5817696378862426

0.602961800397453
0.5817696378862426

0.602961800397453
0.5817696378862426



## AE

In [None]:
train_dataloader = FDDDataloader(
    dataframe=dataset.df,
    mask=dataset.train_mask & (dataset.labels == 0),
    labels=dataset.labels,
    window_size=1024,
    dilation=1,
    step_size=1,
    use_minibatches=True,
    batch_size=32,
    shuffle=True,
)

test_dataloader = FDDDataloader(
    dataframe=dataset.df,
    mask=dataset.test_mask,
    labels=dataset.labels,
    window_size=1024,
    dilation=1,
    step_size=1,
    use_minibatches=True,
    batch_size=32,
)

In [None]:
for latent_dim in [32, 64, 128]:
    ae = FaultDetectionMLPVAE(
        input_dim=1,
        window_size=1024,
        hidden_dim=1024,
        latent_dim=latent_dim,
        beta=0.0,
        device='cuda:1'
    )

    ae.fit(
        train_dataloader,
        num_epochs=250,
        log_dir='/homes/mgoncharov/summerschool2023-benchmark/logs/',
        verbose=0
    )

    y_true = []
    y_score = []
    cnt = 0
    for (x, y, _) in tqdm(test_dataloader):
        y_true.append(y)
        y_score.append(ae.predict(x))

    print(roc_auc_score(np.concatenate(y_true) > 0, np.concatenate(y_score)))

In [None]:
for beta in [0.1, 1.0]:
    vae = FaultDetectionMLPVAE(
        input_dim=1,
        window_size=21024,
        hidden_dim=1024,
        latent_dim=64,
        beta=beta,
        device='cuda:1'
    )

    vae.fit(
        train_dataloader,
        num_epochs=500,
        log_dir='/homes/mgoncharov/summerschool2023-benchmark/logs/',
        verbose=0
    )

    y_true = []
    y_score_1 = []
    y_score_2 = []
    y_score_3 = []
    y_score_4 = []
    cnt = 0
    for (x, y, _) in tqdm(test_dataloader):
        y_true.append(y)
        y_score_1.append(vae.predict(x, scoring='reconstruction_error'))
        y_score_2.append(vae.predict(x, scoring='importance_sampling', std_x=1.0))
        y_score_4.append(vae.predict(x, scoring='importance_sampling', std_x=0.1))

    print(roc_auc_score(np.concatenate(y_true) > 0, np.concatenate(y_score_1)))
    print(roc_auc_score(np.concatenate(y_true) > 0, np.concatenate(y_score_2)))
    print(roc_auc_score(np.concatenate(y_true) > 0, np.concatenate(y_score_3)))
    print(roc_auc_score(np.concatenate(y_true) > 0, np.concatenate(y_score_4)))
    print()