In [None]:
import pickle
import os
import pandas as pd
from tqdm import tqdm
from src.models import *
from src.constants import *
from src.plotting import *
from src.pot import *
from src.utils import *
from src.diagnosis import *
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn as nn
from time import time
from pprint import pprint
from datetime import datetime, timedelta
from main import  load_dataset, backprop
import matplotlib.pyplot as plt
import random

%matplotlib inline

plt.rcParams["text.usetex"] = False
plt.rcParams['figure.figsize'] = 8, 5

def normalize3(a, min_a=None, max_a=None):
    if min_a is None: min_a, max_a = np.min(a, axis=0), np.max(a, axis=0)
    return ((a - min_a) / (max_a - min_a + 0.0001)), min_a, max_a

def convert_to_windows(data, model):
    windows = []
    w_size = model.n_window
    for i, g in enumerate(data):
        if i >= w_size:
            w = data[i - w_size:i]  # cut
        else:
            w = torch.cat([data[0].repeat(w_size - i, 1), data[0:i]])  # pad
        windows.append(w if 'DTAAD' in model.name or 'Attention' in model.name or 'TranAD' in model.name else w.view(-1))
    return torch.stack(windows)

def load_model(modelname, dims):
    import src.models
    model_class = getattr(src.models, modelname)
    model = model_class(dims).double()
    optimizer = torch.optim.AdamW(model.parameters(), lr=model.lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 5, 0.9)
    fname = f'checkpoints/{modelname}_{args.dataset}/model.ckpt'
    if os.path.exists(fname) and (not args.retrain or args.test):
        #print(f"{color.GREEN}Loading pre-trained model: {model.name}{color.ENDC}")
        checkpoint = torch.load(fname, weights_only=False)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        epoch = checkpoint['epoch']
        accuracy_list = checkpoint['accuracy_list']
    else:
        print(f"{color.GREEN}Creating new model: {model.name}{color.ENDC}")
        epoch = -1
        accuracy_list = []
    return model, optimizer, scheduler, epoch, accuracy_list

def filter_noise_ma(df, WS = 100,reduction = False):
    import copy
    new_df = copy.deepcopy(df)

    for column in df:
        new_df[column] = new_df[column].rolling(WS).mean()

    if reduction:
        return new_df.drop(df.index[:WS])[::WS]
    else:
        return new_df.drop(df.index[:WS])

def filter_noise_es(df, alpha=0.4, reduction=False):
    import copy
    new_df = copy.deepcopy(df)
    
    for column in df:
        new_df[column] = df[column].ewm(alpha=alpha, adjust=False).mean()
    
    if reduction:
        return new_df[::len(df)]  # Adjust sparsity if needed
    else:
        return new_df

def wgn_pandas(df_withtime, snr, alpha=0.15, window_size=120):
    df_no_timestamp = df_withtime.drop(columns=['TimeStamp'])
    noisy_df = pd.DataFrame(index=df_no_timestamp.index, columns=df_no_timestamp.columns)

    for start in range(0, len(df_no_timestamp), window_size):
        window = df_no_timestamp.iloc[start:start + window_size]
        
        min_window, max_window = window.min(), window.max()
        #x = (window - min_window) / (max_window - min_window + 1e-4)
        Ps = np.sum(np.power(window, 2), axis=0) / len(window)
        Pn = Ps / (np.power(10, snr / 10))

        noise = np.random.randn(*window.shape) * np.sqrt(Pn.values)
        noisy_window = window + (noise / 100)

        noisy_df.iloc[start:start + window_size] = noisy_window
    
    noisy_df.reset_index(drop=True, inplace=True)
    noisy_df = filter_noise_es(pd.DataFrame(noisy_df, columns=noisy_df.columns), alpha)

    df_timestamp = df_withtime['TimeStamp']
    df_timestamp.reset_index(drop=True, inplace=True)

    df_withtime = pd.concat([df_timestamp, noisy_df], axis=1)
    return df_withtime

feature_set = ['Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
    'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
    'TGB Y displacement', 'Stator winding temperature 13',
    'Stator winding temperature 14', 'Stator winding temperature 15',
    'Surface Air Cooler Air Outlet Temperature',
    'Surface Air Cooler Water Inlet Temperature',
    'Surface Air Cooler Water Outlet Temperature',
    'Stator core temperature', 'UGB metal temperature',
    'LGB metal temperature 1', 'LGB metal temperature 2',
    'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
    'UGB cooling water flow', 'LGB cooling water flow',
    'Generator cooling water flow', 'Governor Penstock Pressure',
    'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
    'Gen Thrust Bearing Oil Contaminant']

In [None]:
dataset_folder = 'data/CustomAWGN30ES15'
df_train = pd.read_csv(os.path.join(dataset_folder, 'train.csv'))
df_test = pd.read_csv(os.path.join(dataset_folder, 'test.csv'))
df_train, df_test = df_train.values[:, 1:], df_test.values[:, 1:]
_, min_a, max_a = normalize3(np.concatenate((df_train, df_test), axis=0))

In [None]:
model_array = ["Attention", "DTAAD", "MTAD_GAT", "MAD_GAN", "TranAD", "DAGMM", "USAD"] # , CAE_M "GDN" MSCRED
model_thr = {
    "Attention": 0, "DTAAD": 0, "MTAD_GAT": 0, "MAD_GAN": 0, "TranAD": 0, "DAGMM": 0, "USAD": 0
}

In [None]:
for model_now in model_array:
    train_loader, test_loader, labels = load_dataset("CustomAWGN30ES15")
    trainD, testD = next(iter(train_loader)), next(iter(test_loader))
    trainO, testO = trainD, testD
    model, optimizer, scheduler, epoch, accuracy_list = load_model(model_now, trainO.shape[1])

    if model.name in ['Attention', 'DAGMM', 'USAD', 'MSCRED', 'CAE_M', 'GDN', 'MTAD_GAT',
                        'MAD_GAN', 'TranAD'] or 'DTAAD' in model.name:
        trainD, testD = convert_to_windows(trainD, model), convert_to_windows(testD, model)

    torch.zero_grad = True
    model.eval()
    print(f'{color.HEADER}Testing {args.model} on {args.dataset}{color.ENDC}')
    loss, y_pred = backprop(0, model, testD, testO, optimizer, scheduler, training=False)

    with open(f'loss_fold/{model_now}.pickle', 'wb') as handle:
        pickle.dump(loss, handle, protocol=pickle.HIGHEST_PROTOCOL)

    #model_thr[model_now] = [np.percentile(loss[:, index], 99) for index in range(len(feature_set))]

    del loss, y_pred, trainD, testD, train_loader, test_loader, labels, model, optimizer

In [None]:
for model_now in model_array:
    with open(f'loss_fold/{model_now}.pickle', 'rb') as handle:
        loss = pickle.load(handle)
    model_thr[model_now] = [np.percentile(loss[:, index], 99) for index in range(len(feature_set))]

In [None]:
df_data_withtime = pd.read_pickle("/run/media/fourier/Data2/Pras/Vale/time-series-autoencoder/my_data_5thn_olah.pickle")
mask = (df_data_withtime['TimeStamp'] >= '2020-01-01 00:00:00')
df_data_withtime = df_data_withtime.loc[mask]

for column_name in df_data_withtime.columns:
    if column_name != 'Load_Type' and column_name != 'TimeStamp':
        df_data_withtime[column_name] = pd.to_numeric(df_data_withtime[column_name], downcast='float')
        
df_anomaly = pd.read_excel("/run/media/fourier/Data2/Pras/Vale/time-series-autoencoder/shutdown_list.xlsx", 'Sheet2')
df_anomaly['Start Time'] = pd.to_datetime(df_anomaly['Start Time'])
df_anomaly['End Time'] = pd.to_datetime(df_anomaly['End Time'])
df_anomaly_unplaned = df_anomaly.copy()

mask = (df_anomaly_unplaned['Interal/External'] == 'Internal') & (df_anomaly_unplaned['Shutdown Type'] == 'Unplanned') & (df_anomaly_unplaned['Start Time'] >= '2020-01-01 00:00:00')
df_anomaly_unplaned = df_anomaly_unplaned.loc[mask]
df_anomaly_unplaned = df_anomaly_unplaned.reset_index(drop=True)
df_anomaly_unplaned = df_anomaly_unplaned.drop(df_anomaly_unplaned.index[[2]])
df_anomaly_unplaned

In [8]:
measured_horizon = 60 * 3 * 1

In [9]:
failure_index_list = 1
index_before = 0

thr_array_fault = {}
for failure_index_list in range(3):
    threshold_percentage_all = {}
    end_date_filter = df_anomaly_unplaned.values[failure_index_list, 0] - timedelta(minutes=(100 * index_before) + 5)
    start_date_filter =  end_date_filter - timedelta(minutes=measured_horizon)

    mask = (df_data_withtime['TimeStamp'] > start_date_filter.strftime('%Y-%m-%d %H:%M:%S')) & (df_data_withtime['TimeStamp'] <= end_date_filter.strftime('%Y-%m-%d %H:%M:%S'))
    df_sel = df_data_withtime.loc[mask]
    df_sel = df_sel.reset_index(drop=True)
    df_sel = wgn_pandas(df_sel, 30, alpha=0.15)

    df_timestamp = df_sel.iloc[:, 0]
    df_feature =  df_sel.iloc[:, 1:]
    df_feature = df_feature[feature_set]
    raw_active = df_feature['Active Power'].values

    df_feature, _, _ = normalize3(df_feature, min_a, max_a)
    df_feature = df_feature.astype(float)

    test_loader = DataLoader(df_feature.values, batch_size=df_feature.shape[0])
    testD = next(iter(test_loader))
    testO = testD

    feature_num = 31

    for idx_model, model_now in enumerate(model_array):
        model, _, _, _, _ = load_model(model_now, testO.shape[1])
        torch.zero_grad = True
        model.eval()

        if model.name in ['Attention', 'DAGMM', 'USAD', 'MSCRED', 'CAE_M', 'GDN', 'MTAD_GAT', 'MAD_GAN', 'TranAD'] or 'DTAAD' in model.name:
            testD_now = convert_to_windows(testD, model)

        loss, y_pred = backprop(0, model, testD_now, testO, None, None, training=False)
        if 'TranAD' or 'DTAAD' in model.name: testO_now = torch.roll(testO, 1, 0)

        threshold_pass = {}
        for i in range(loss.shape[-1]):
            index_plot = i + 1 + (idx_model * feature_num)
            thres_bool = loss[:, i] > model_thr[model_now][i]
            threshold_pass[feature_set[i]] = (thres_bool.sum() / thres_bool.shape[0]) * 100
        
        threshold_pass = dict(sorted(threshold_pass.items(), key=lambda item: item[1], reverse=True)[:5])
        threshold_percentage_all[model_now] = threshold_pass

    thr_array_fault[df_anomaly_unplaned.values[failure_index_list, 4]] = threshold_percentage_all

  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)


In [15]:
thr_array_fault

{'TGB Oil Level': {'Attention': {'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0,
   'UGB X displacement': 0.0,
   'UGB Y displacement': 0.0},
  'DTAAD': {'Active Power': 4.444444444444445,
   'Turbine flow': 4.444444444444445,
   'Opening Wicked Gate': 4.444444444444445,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0},
  'MTAD_GAT': {'UGB X displacement': 2.2222222222222223,
   'LGB cooling water flow': 1.6666666666666667,
   'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0},
  'MAD_GAN': {'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0,
   'UGB X displacement': 0.0,
   'UGB Y displacement': 0.0},
  'TranAD': {'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0,
   'UGB X displacement': 0.0,
   'UGB Y displacement': 0.0},
  'DAGMM': {'Turbine flow': 6.666666666666667,
   'Active Power': 6.111111111111111,
   'Opening Wicked Gate': 5.0,
   'Reactive Power': 

In [12]:
count_file = 0
thr_array_random = []
while count_file < 15:
    random_index = random.randrange(len(df_data_withtime))
    df_sel = df_data_withtime.iloc[random_index:random_index + (measured_horizon), :]
    df_sel = df_sel.reset_index(drop=True)
    df_timestamp = df_sel.iloc[:, 0]

    firstTimestamp = df_timestamp.iloc[0]
    lastTimestamp = df_timestamp.iloc[-1]
    is_infailure = False
    failure_time = np.inf

    for _, anomaly in df_anomaly.iterrows():
        start = anomaly['Start Time']
        end = anomaly['End Time']
        anomaly_type = anomaly['Interal/External']

        if anomaly_type == 'Internal':
            timedelta_hours = (firstTimestamp - start) / np.timedelta64(1, 'h')
            if timedelta_hours < 0 and np.abs(timedelta_hours) < failure_time:
                failure_time = np.abs(timedelta_hours)

        if (firstTimestamp >= start and firstTimestamp <= end) or (lastTimestamp >= start and lastTimestamp <= end):
            is_infailure = True
            break
    
    if is_infailure != True and failure_time != np.inf:
        threshold_percentage_all = {}
        count_file = count_file + 1

        df_sel = wgn_pandas(df_sel, 30, alpha=0.15)
        df_timestamp = df_sel.iloc[:, 0]
        df_feature =  df_sel.iloc[:, 1:]
        df_feature = df_feature[feature_set]
        raw_active = df_feature['Active Power'].values

        df_feature, _, _ = normalize3(df_feature, min_a, max_a)
        df_feature = df_feature.astype(float)

        test_loader = DataLoader(df_feature.values, batch_size=df_feature.shape[0])
        testD = next(iter(test_loader))
        testO = testD

        feature_num = 31
        for idx_model, model_now in enumerate(model_array):
            model, _, _, _, _ = load_model(model_now, testO.shape[1])
            torch.zero_grad = True
            model.eval()

            if model.name in ['Attention', 'DAGMM', 'USAD', 'MSCRED', 'CAE_M', 'GDN', 'MTAD_GAT', 'MAD_GAN', 'TranAD'] or 'DTAAD' in model.name:
                testD_now = convert_to_windows(testD, model)

            loss, y_pred = backprop(0, model, testD_now, testO, None, None, training=False)
            if 'TranAD' or 'DTAAD' in model.name: testO_now = torch.roll(testO, 1, 0)

            threshold_pass = {}
            for i in range(loss.shape[-1]):
                index_plot = i + 1 + (idx_model * feature_num)
                thres_bool = loss[:, i] > model_thr[model_now][i]
                threshold_pass[feature_set[i]] = (thres_bool.sum() / thres_bool.shape[0]) * 100
            
            threshold_pass = dict(sorted(threshold_pass.items(), key=lambda item: item[1], reverse=True)[:5])
            threshold_percentage_all[model_now] = threshold_pass

        thr_array_random.append(threshold_percentage_all)

  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)
  WeightNorm.apply(module, name, dim)


In [14]:
thr_array_random

[{'Attention': {'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0,
   'UGB X displacement': 0.0,
   'UGB Y displacement': 0.0},
  'DTAAD': {'LGB Y displacement': 1.1111111111111112,
   'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0,
   'UGB X displacement': 0.0},
  'MTAD_GAT': {'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0,
   'UGB X displacement': 0.0,
   'UGB Y displacement': 0.0},
  'MAD_GAN': {'LGB X displacement': 2.7777777777777777,
   'LGB Y displacement': 2.2222222222222223,
   'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0},
  'TranAD': {'LGB X displacement': 0.5555555555555556,
   'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor speed actual': 0.0,
   'UGB X displacement': 0.0},
  'DAGMM': {'LGB Y displacement': 2.2222222222222223,
   'LGB X displacement': 1.6666666666666667,
   'Active Power': 0.0,
   'Reactive Power': 0.0,
   'Governor spe

In [None]:
failure_index_list = 1
index_before = 0

for failure_index_list in range(4):
    end_date_filter = df_anomaly_unplaned.values[failure_index_list, 0] - timedelta(minutes=(100 * index_before) + 5)
    start_date_filter =  end_date_filter - timedelta(minutes=60 * 12 * 1)

    mask = (df_data_withtime['TimeStamp'] > start_date_filter.strftime('%Y-%m-%d %H:%M:%S')) & (df_data_withtime['TimeStamp'] <= end_date_filter.strftime('%Y-%m-%d %H:%M:%S'))
    df_sel = df_data_withtime.loc[mask]
    df_sel = df_sel.reset_index(drop=True)
    df_sel = wgn_pandas(df_sel, 30, alpha=0.15)

    df_timestamp = df_sel.iloc[:, 0]
    df_feature =  df_sel.iloc[:, 1:]
    df_feature = df_feature[feature_set]
    raw_active = df_feature['Active Power'].values

    df_feature, _, _ = normalize3(df_feature, min_a, max_a)
    df_feature = df_feature.astype(float)

    test_loader = DataLoader(df_feature.values, batch_size=df_feature.shape[0])
    testD = next(iter(test_loader))
    testO = testD

    feature_num = 31
    fig, axes = plt.subplots(31, len(model_array), figsize=(40, 60))
    axes_flat = axes.T.flatten()

    for i in range(len(model_array)):
        axes_flat[i * feature_num].plot(raw_active)
        axes_flat[i * feature_num].set_title(f"Active Power")
        axes_flat[i * feature_num].grid(True)

    for idx_model, model_now in enumerate(model_array):
        model, _, _, _, _ = load_model(model_now, testO.shape[1])
        torch.zero_grad = True
        model.eval()

        if model.name in ['Attention', 'DAGMM', 'USAD', 'MSCRED', 'CAE_M', 'GDN', 'MTAD_GAT', 'MAD_GAN', 'TranAD'] or 'DTAAD' in model.name:
            testD_now = convert_to_windows(testD, model)

        loss, y_pred = backprop(0, model, testD_now, testO, None, None, training=False)
        if 'TranAD' or 'DTAAD' in model.name: testO_now = torch.roll(testO, 1, 0)

        for i in range(loss.shape[-1]):
            index_plot = i + 1 + (idx_model * feature_num)
            axes_flat[index_plot].plot(loss[:, i])
            axes_flat[index_plot].axhline(y=model_thr[model_now][i], c="red")
            axes_flat[index_plot].set_title(feature_set[i])
            axes_flat[index_plot].grid(True)

    plt.suptitle(f"{df_anomaly_unplaned.values[failure_index_list, 4]}_{df_anomaly_unplaned.values[failure_index_list, 1]}_{'-'.join(model_array)}", y=1)
    plt.tight_layout()
    plt.savefig(f'result_colate/{str(df_anomaly_unplaned.values[failure_index_list, 1])}.png')
    plt.close()

In [None]:
count_file = 0
while count_file < 15:
    random_index = random.randrange(len(df_data_withtime))
    df_sel = df_data_withtime.iloc[random_index:random_index + (60 * 12 * 1), :]
    df_sel = df_sel.reset_index(drop=True)
    df_timestamp = df_sel.iloc[:, 0]

    firstTimestamp = df_timestamp.iloc[0]
    lastTimestamp = df_timestamp.iloc[-1]
    is_infailure = False
    failure_time = np.inf

    for _, anomaly in df_anomaly.iterrows():
        start = anomaly['Start Time']
        end = anomaly['End Time']
        anomaly_type = anomaly['Interal/External']

        if anomaly_type == 'Internal':
            timedelta_hours = (firstTimestamp - start) / np.timedelta64(1, 'h')
            if timedelta_hours < 0 and np.abs(timedelta_hours) < failure_time:
                failure_time = np.abs(timedelta_hours)

        if (firstTimestamp >= start and firstTimestamp <= end) or (lastTimestamp >= start and lastTimestamp <= end):
            is_infailure = True
            break
    
    if is_infailure != True and failure_time != np.inf:
        count_file = count_file + 1

        df_sel = wgn_pandas(df_sel, 30, alpha=0.15)
        df_timestamp = df_sel.iloc[:, 0]
        df_feature =  df_sel.iloc[:, 1:]
        df_feature = df_feature[feature_set]
        raw_active = df_feature['Active Power'].values

        df_feature, _, _ = normalize3(df_feature, min_a, max_a)
        df_feature = df_feature.astype(float)

        test_loader = DataLoader(df_feature.values, batch_size=df_feature.shape[0])
        testD = next(iter(test_loader))
        testO = testD

        feature_num = 31
        fig, axes = plt.subplots(31, len(model_array), figsize=(40, 60))
        axes_flat = axes.T.flatten()

        for i in range(len(model_array)):
            axes_flat[i * feature_num].plot(raw_active)
            axes_flat[i * feature_num].set_title(f"Active Power")
            axes_flat[i * feature_num].grid(True)

        for idx_model, model_now in enumerate(model_array):
            model, _, _, _, _ = load_model(model_now, testO.shape[1])
            torch.zero_grad = True
            model.eval()

            if model.name in ['Attention', 'DAGMM', 'USAD', 'MSCRED', 'CAE_M', 'GDN', 'MTAD_GAT', 'MAD_GAN', 'TranAD'] or 'DTAAD' in model.name:
                testD_now = convert_to_windows(testD, model)

            loss, y_pred = backprop(0, model, testD_now, testO, None, None, training=False)
            if 'TranAD' or 'DTAAD' in model.name: testO_now = torch.roll(testO, 1, 0)

            for i in range(loss.shape[-1]):
                index_plot = i + 1 + (idx_model * feature_num)
                axes_flat[index_plot].plot(loss[:, i])
                axes_flat[index_plot].axhline(y=model_thr[model_now][i], c="red")
                axes_flat[index_plot].set_title(feature_set[i])
                axes_flat[index_plot].grid(True)

        plt.suptitle(f"{firstTimestamp}_{'-'.join(model_array)}", y=1)
        plt.tight_layout()
        plt.savefig(f'result_colate/random_{count_file}.png')
        plt.close()

In [None]:
loss_now = loss[:, 4]
q25, q75 = np.percentile(loss_now, [25, 75])
bin_width = 2 * (q75 - q25) * len(loss_now) ** (-1/3)
bins = round((loss_now.max() - loss_now.min()) / bin_width)

plt.hist(loss_now, density=True, bins=bins)
plt.show()

In [None]:
df_data_withtime = pd.read_pickle("/run/media/fourier/Data2/Pras/Vale/time-series-autoencoder/my_data_5thn_olah.pickle")
mask = (df_data_withtime['TimeStamp'] >= '2020-01-01 00:00:00')
df_data_withtime = df_data_withtime.loc[mask]

for column_name in df_data_withtime.columns:
    if column_name != 'Load_Type' and column_name != 'TimeStamp':
        df_data_withtime[column_name] = pd.to_numeric(df_data_withtime[column_name], downcast='float')
        
df_anomaly = pd.read_excel("/run/media/fourier/Data2/Pras/Vale/time-series-autoencoder/shutdown_list.xlsx", 'Sheet2')
df_anomaly['Start Time'] = pd.to_datetime(df_anomaly['Start Time'])
df_anomaly['End Time'] = pd.to_datetime(df_anomaly['End Time'])
df_anomaly_unplaned = df_anomaly.copy()

mask = (df_anomaly_unplaned['Interal/External'] == 'Internal') & (df_anomaly_unplaned['Shutdown Type'] == 'Unplanned') & (df_anomaly_unplaned['Start Time'] >= '2020-01-01 00:00:00')
df_anomaly_unplaned = df_anomaly_unplaned.loc[mask]
df_anomaly_unplaned = df_anomaly_unplaned.reset_index(drop=True)
df_anomaly_unplaned

In [None]:
failure_index_list = 3
index_before = 0

end_date_filter = df_anomaly_unplaned.values[failure_index_list, 0] + timedelta(minutes=1) - timedelta(minutes=(100 * index_before) + 1)
start_date_filter =  end_date_filter - timedelta(minutes=60 * 6 * 1)

mask = (df_data_withtime['TimeStamp'] > start_date_filter.strftime('%Y-%m-%d %H:%M:%S')) & (df_data_withtime['TimeStamp'] <= end_date_filter.strftime('%Y-%m-%d %H:%M:%S'))
df_sel = df_data_withtime.loc[mask]
df_sel = df_sel.reset_index(drop=True)
df_sel = add_time_series_noise(df_sel)

df_timestamp = df_sel.iloc[:, 0]
df_feature =  df_sel.iloc[:, 1:]
df_feature = df_feature[feature_set]
raw_active = df_feature['Active Power'].values

df_feature, _, _ = normalize3(df_feature, min_a, max_a)
df_feature = df_feature.astype(float)

test_loader = DataLoader(df_feature.values, batch_size=df_feature.shape[0])
testD = next(iter(test_loader))
testO = testD

if model.name in ['Attention', 'DAGMM', 'USAD', 'MSCRED', 'CAE_M', 'GDN', 'MTAD_GAT', 'MAD_GAN', 'TranAD'] or 'DTAAD' in model.name:
    testD = convert_to_windows(testD, model)

data = testD
dataO = testO

loss, y_pred = backprop(0, model, testD, testO, optimizer, scheduler, training=False)
if 'TranAD' or 'DTAAD' in model.name: testO = torch.roll(testO, 1, 0)

y_true = testO.cpu().detach().numpy()[5:, :]
y_pred = y_pred[5:, :]
ascore = loss[5:, :]

fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)
ax1.plot(df_timestamp.values[5:], raw_active[5:])
ax2.plot(df_timestamp.values[5:], smooth(np.sum(ascore, axis=-1)), color='y', label='sum')
ax3.plot(df_timestamp.values[5:], smooth(np.mean(ascore, axis=-1)), color='g', label='Score')

shutdown_str = ''
for _, anomaly in df_anomaly.iterrows():
    start = anomaly['Start Time']
    end = anomaly['End Time']
    anomaly_type = anomaly['Interal/External']

    if start <= end_date_filter and end >= start_date_filter:
        if start <= start_date_filter:
            start = start_date_filter

        if end >= end_date_filter:
            end = end_date_filter

        if anomaly['Shutdown Type'] == 'Unplanned':
            if anomaly_type == 'Internal':
                ax1.axvspan(start, end, color='red', alpha=0.6, label=f'{anomaly["Shutdown Type"]} Shutdown Area')
            elif anomaly_type == 'External':
                ax1.axvspan(start, end, color='gray', alpha=0.8, label=f'{anomaly["Shutdown Type"]} Shutdown Area')
        elif anomaly['Shutdown Type'] == 'Planned':
            ax1.axvspan(start, end, color='purple', alpha=0.6, label=f'{anomaly_type} Shutdown Area')
        shutdown_str = anomaly['Event']

ax3.set_xlabel('Timestamp')
plt.gcf().autofmt_xdate()
fig.suptitle(f'{model_now} || {shutdown_str}')
fig.show()

In [None]:
contributions = ascore[-60:, :] / np.sum(ascore, axis=-1)[-60:, np.newaxis]
overall_contributions = np.mean(contributions, axis=0)

ranked_indices = np.argsort(overall_contributions)[::-1]  # Sort descending
rankings = [(i+1, idx, overall_contributions[idx]) for i, idx in enumerate(ranked_indices)]

print("Sensor Rankings (Overall Contribution):")
for rank, sensor_idx, contribution in rankings:
    print(f"Rank {rank}: Sensor {feature_set[sensor_idx]} with contribution {contribution:.2f}")

    if rank >= 4:
        break

In [None]:
for dim in range(y_true.shape[1]):
    y_t, y_p, a_s = y_true[:, dim], y_pred[:, dim], ascore[:, dim]
    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)

    ax1.set_ylabel('Value')
    ax1.set_title(f'{feature_set[dim]}')

    ax1.plot(y_t, label='True')
    ax1.plot(y_p, '-', alpha=0.6, label='Predicted')
    ax1.legend()

    if dim == 0: ax1.legend(ncol=2, bbox_to_anchor=(0.6, 1.02))
    ax2.plot(smooth(a_s), color='g', label='Score')
    ax2.axhline(y=array_percentile[dim], c="red")
    ax2.set_xlabel('Timestamp')
    ax2.set_ylabel('Anomaly Score')
    ax2.legend()