In [1]:
from lib import *
import os
import re
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import average_precision_score

preprocessed_path = '/workspace/TAIL_MIL/dataset/SMD/preprocessed_set/'
interpretation_path = '/workspace/TAIL_MIL/dataset/SMD/interpretation_label/'

x_train_path = [preprocessed_path + t for t in sorted(os.listdir(preprocessed_path)) if 'train' in t]
x_test_path = [preprocessed_path + t for t in sorted(os.listdir(preprocessed_path)) if 'test.' in t]
y_test_path = [preprocessed_path + t for t in sorted(os.listdir(preprocessed_path)) if 'test_' in t]
y_test_label = [interpretation_path + t for t in sorted(os.listdir(interpretation_path)) if 'machine' in t]

In [2]:
for ii in tqdm_notebook(range(len(x_train_path))):
    f = open(x_train_path[ii], "rb")
    x_train = pickle.load(f)
    f.close()

    f = open(x_test_path[ii], "rb")
    x_test = pickle.load(f)
    f.close()

    f = open(y_test_path[ii], "rb")
    y_test = pickle.load(f).reshape((-1))
    f.close()

    f = open(y_test_label[ii], 'r')     
    print(f) 
    lines= f.read()
    f.close()

    print('-------------------------------------------------------------')
    print(x_train_path[ii].split('/')[-1][:-4])
    
    start_indexes = []
    end_indexes = []
    i_labels = []

    for info in lines.split('\n')[:-1]:
        pattern = r'(\d+)-(\d+):([\d,]+)'
        match = re.match(pattern, info)
        if match:
            start_index = int(match.group(1))
            end_index = int(match.group(2))
            columns_to_label = list(map(int, match.group(3).split(',')))

            start_indexes.append(start_index)
            end_indexes.append(end_index)
            i_labels.append(columns_to_label)

    x_train, scaler = normalize_data(x_train, scaler=None)
    x_test, _ = normalize_data(x_test, scaler=scaler)

    n_features = x_train.shape[1]
    window_size, target_dims = 12, x_train.shape[1]
    out_dim = 1
    batch_size, val_split, shuffle_dataset = 128, 0.2, True

    train_dataset = SlidingWindowDataset(x_train, window_size, target_dims)
    test_dataset = SlidingWindowDataset(x_test, window_size, target_dims)

    train_loader, val_loader, test_loader = create_data_loaders(
        train_dataset, batch_size, val_split, shuffle_dataset, test_dataset=test_dataset
    )

    device = torch.device("cuda:0")


    model = MTAD_GAT_RECON(
        n_features,
        window_size,
        n_features,
        kernel_size=7,
        use_gatv2=True,
        feat_gat_embed_dim=None,
        time_gat_embed_dim=None,
        gru_n_layers=1,
        gru_hid_dim=300,
        recon_n_layers=1,
        recon_hid_dim=300,
        dropout=0.3,
        alpha=0.2
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr= 0.001)
    # forecast_criterion = nn.MSELoss()
    recon_criterion = nn.MSELoss()
    epochs = 100

    save_path = 'Model/' + x_train_path[ii].split('/')[-1][:-4]+'.p'

    train_reconstruct_losses = []

    valid_reconstruct_losses = []

    min_loss = 999999

    for e in tqdm_notebook(range(epochs)):
        model.train()

        forecast_b_losses = []
        recon_b_losses = []

        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()

            recons = model(x)

            recon_loss = torch.sqrt(recon_criterion(x, recons))

            recon_loss.backward()
            optimizer.step()

            recon_b_losses.append(recon_loss.item())

        recon_b_losses = np.array(recon_b_losses)

        recon_epoch_loss = np.sqrt((recon_b_losses ** 2).mean())

        train_reconstruct_losses.append(recon_epoch_loss)

    #     if e == 0 or (e+1) % 5 == 0:
    #         print(f'Train Loss')
    #         print(f'Train Reconstruct Loss: {recon_epoch_loss}')

        model.eval()
        recon_b_losses = []

        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)
            recons = model(x)

            recon_loss = torch.sqrt(recon_criterion(x, recons))

            recon_b_losses.append(recon_loss.item())

        recon_b_losses = np.array(recon_b_losses)

        recon_epoch_loss = np.sqrt((recon_b_losses ** 2).mean())

        valid_reconstruct_losses.append(recon_epoch_loss)

    #     if e == 0 or (e+1) % 5 == 0:
    #         print(f'Valid Loss')
    #         print(f'Valid Reconstruct Loss: {recon_epoch_loss}')

        if min_loss > recon_epoch_loss:
            min_loss = recon_epoch_loss
            torch.save(model.state_dict(), save_path)

    model.load_state_dict(torch.load(save_path))

    recons = []

    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)
        y_hat = model(x)

        recons.append((y_hat - y).detach().cpu().numpy()**2)

    recons = np.concatenate(recons, axis=0)

    sliding_window_label = []

    for i in range(len(y_test)-12):
        if sum(y_test[i:i+12]) > 0:
            sliding_window_label.append(1)
        else:
            sliding_window_label.append(0)

    sliding_window_label = np.array(sliding_window_label)

    sliding_window_label.shape

    print('Slinding Window Performance')
    print(f'AUROC: {roc_auc_score(sliding_window_label, recons.sum(2).sum(1))}')
    print(f'AUPR: {average_precision_score(sliding_window_label, recons.sum(2).sum(1))}')

    print('One Step Performance')
    print(f'AUROC: {roc_auc_score(y_test[12:], recons[:, -1].sum(1))}')
    print(f'AUPR: {average_precision_score(y_test[12:], recons[:, -1].sum(1))}')

    corrects = []
    for i in range(len(start_indexes)):
        temp_preds = recons[:, -1].reshape(-1, 38)[start_indexes[i]:end_indexes[i]].argmax(1)
        for j in range(len(temp_preds)):
            if temp_preds[j] in i_labels[i]:
                corrects.append(1)
            else:
                corrects.append(0)

    print('Interpretation Performance')
    print(f'ACC: {sum(corrects) / len(corrects)}')
    print('-------------------------------------------------------------')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for ii in tqdm_notebook(range(len(x_train_path))):


  0%|          | 0/11 [00:00<?, ?it/s]

<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-1.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-1_train
Data normalized
Data normalized
train_size: 22774
validation_size: 5693
test_size: 28467


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.914903631860145
AUPR: 0.6201934814205685
One Step Performance
AUROC: 0.8891126747025044
AUPR: 0.5964226980968581
Interpretation Performance
ACC: 0.2845894263217098
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-2.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-2_train
Data normalized
Data normalized
train_size: 18946
validation_size: 4736
test_size: 23682


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.8299469350460457
AUPR: 0.22428688933885743
One Step Performance
AUROC: 0.7098728420300624
AUPR: 0.12134225051407585
Interpretation Performance
ACC: 0.23985239852398524
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-3.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-3_train
Data normalized
Data normalized
train_size: 18952
validation_size: 4738
test_size: 23691


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.8459802768564664
AUPR: 0.1768443733516168
One Step Performance
AUROC: 0.7773164017363388
AUPR: 0.13871430117020972
Interpretation Performance
ACC: 0.1453416149068323
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-4.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-4_train
Data normalized
Data normalized
train_size: 18956
validation_size: 4738
test_size: 23695


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.8459306268817212
AUPR: 0.18706277446265174
One Step Performance
AUROC: 0.7389225003022609
AUPR: 0.12346621829264669
Interpretation Performance
ACC: 0.12777777777777777
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-5.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-5_train
Data normalized
Data normalized
train_size: 18955
validation_size: 4738
test_size: 23694


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.9441070277565766
AUPR: 0.6212197029536928
One Step Performance
AUROC: 0.8839997456980587
AUPR: 0.5083237944514905
Interpretation Performance
ACC: 0.8
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-6.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-6_train
Data normalized
Data normalized
train_size: 18941
validation_size: 4735
test_size: 23677


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.9220365436646577
AUPR: 0.8730119216274529
One Step Performance
AUROC: 0.9251840757705188
AUPR: 0.8799309919383843
Interpretation Performance
ACC: 0.5585221143473571
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-7.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-7_train
Data normalized
Data normalized
train_size: 18948
validation_size: 4737
test_size: 23685


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.7683780271426025
AUPR: 0.3374215009958204
One Step Performance
AUROC: 0.7953624426612851
AUPR: 0.37735989358651517
Interpretation Performance
ACC: 0.10133444537114263
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-1-8.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-1-8_train
Data normalized
Data normalized
train_size: 18949
validation_size: 4737
test_size: 23687


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.751535148798066
AUPR: 0.2804426983062452
One Step Performance
AUROC: 0.6511415119948463
AUPR: 0.2610456073703935
Interpretation Performance
ACC: 0.2503276539973788
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-2-1.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-2-1_train
Data normalized
Data normalized
train_size: 18945
validation_size: 4736
test_size: 23682


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.7299193132482572
AUPR: 0.1942471222269911
One Step Performance
AUROC: 0.6690452461441267
AUPR: 0.14658512129759513
Interpretation Performance
ACC: 0.34957264957264955
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-2-2.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-2-2_train
Data normalized
Data normalized
train_size: 18950
validation_size: 4737
test_size: 23688


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.5460984133485902
AUPR: 0.16060735200944937
One Step Performance
AUROC: 0.5367904453142117
AUPR: 0.1371526266494741
Interpretation Performance
ACC: 0.1418990469466996
-------------------------------------------------------------
<_io.TextIOWrapper name='/workspace/TAIL_MIL/dataset/SMD/interpretation_label/machine-2-3.txt' mode='r' encoding='UTF-8'>
-------------------------------------------------------------
machine-2-3_train
Data normalized
Data normalized
train_size: 18941
validation_size: 4735
test_size: 23677


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for e in tqdm_notebook(range(epochs)):


  0%|          | 0/100 [00:00<?, ?it/s]

Slinding Window Performance
AUROC: 0.9489846026168689
AUPR: 0.4785420158095708
One Step Performance
AUROC: 0.917298632691902
AUPR: 0.3437402160698954
Interpretation Performance
ACC: 0.4460431654676259
-------------------------------------------------------------


In [None]:
print()
print('Slinding Window Performance')
print(f'AUROC: {roc_auc_score(sliding_window_label, recons.sum(2).sum(1))}')
print(f'AUPR: {average_precision_score(sliding_window_label, recons.sum(2).sum(1))}')
print()
print('One Step Performance')
print(f'AUROC: {roc_auc_score(y_test[12:], recons[:, -1].sum(1))}')
print(f'AUPR: {average_precision_score(y_test[12:], recons[:, -1].sum(1))}')

corrects = []
for i in range(len(start_indexes)):
    temp_preds = recons[:, -1].reshape(-1, 38)[start_indexes[i]:end_indexes[i]].argmax(1)
    for j in range(len(temp_preds)):
        if temp_preds[j] in i_labels[i]:
            corrects.append(1)
        else:
            corrects.append(0)
print()
print('Interpretation Performance')
print(f'ACC: {sum(corrects) / len(corrects)}')
print()