In [1]:
import json
import os
import random
import shutil
import time
from importlib import import_module

import numpy as np
import torch

from Preprocess.Normalization import minMaxNormalization
from Preprocess.Window import convertToSlidingWindow
from Utils.DataUtil import readData
from Utils.DistanceUtil import KLDivergence, Softmax, JSDivergence
from Utils.EvalUtil import findSegment, countResult
from Utils.LogUtil import wirteLog
from Utils.PlotUtil import plotAllResult
import pandas as pd








In [2]:


# def calculateSimilarity(origin_sample_list,new_sample_list,old_anomaly_scores,old_label_samples,threshold = 0.5):
#
#     '''
#     计算新数据列表和旧数据列表的相似性，返回列表
#     :param origin_sample_list: 需要比较的旧数据的样本列表,即窗口列表
#     :param new_sample_list: 需要比较的新数据的样本列表,即窗口列表
#     :return:返回列表格式，每个新数据样本对应的相似性最大的旧数据样本的Index以及相似性数值。 [(max_similarity_index,max_similarity)]
#     '''
#
#     total_similarity = 0
#
#     result = []
#     for new_index,new_sample in enumerate(new_sample_list):
#         max_similarity = 0
#         max_similarity_index = 0
#         for origin_index,origin_sample in enumerate(origin_sample_list):
#
#             similarity = getSimilarity(origin_sample,new_sample)
#             if similarity > max_similarity:
#                 max_similarity = similarity
#                 max_similarity_index = origin_index
#
#         total_similarity += max_similarity
#
#         result.append((max_similarity_index,max_similarity))
#
#     return result,total_similarity


def getMatrixKey(sample):
    first = np.mean(sample[0])
    last = np.mean(sample[-1])
    mean_all = np.mean(sample)
    var_all = np.var(sample)

    mean_all= np.floor(mean_all * 100)   # 先乘以10，再使用floor，然后再除以10
    var_all = np.floor(var_all * 100)
    last  = np.floor(last * 100)
    first = np.floor(first * 100)
    res = f"{mean_all}{var_all}{last}{first}"
    return res.replace(".","-")


def getDistinctAndNum(sample_all) -> dict:

    result = {}
    for new_sample in sample_all:
        # new_sample_flatten = new_sample.flatten()
        key = getMatrixKey(new_sample)
        if result.get(key) == None:
            result[key] = countSame(new_sample,sample_all)

    return result







def unique(array_list):
    # 获取数组形状
    unique_arrays = {tuple(map(tuple, array)): array for array in array_list}

    # 提取去重后的 NumPy 数组
    unique_array_list = list(unique_arrays.values())

    return unique_array_list




In [3]:
def convertToWindow(data, window_size):
    """
    stride为1，前window_size -1 个时间点的时间窗口，通过复制前面元素构成
    """
    windows = []

    for i, g in enumerate(data):
        if i >= window_size:
            w = data[i - window_size + 1:i + 1]
        else:

            w = np.concatenate([np.tile(data[0], window_size - i).reshape(window_size - i, -1), data[1:i + 1]])

        windows.append(w)
    return np.stack(windows)



In [4]:
def getConfigs():
    config = {
            "epoch": 2,
            "batch_size": 128,
            "window_size": 10,
            "identifier": "model-evaluation",
            "hidden_size": 64,
            "latent_size": 32,
            "num_layers": 2,
            "num_heads": 1,
            "drop_out_rate": 0.1,
            "learning_rate": 1e-3,
            "patience": 10,
            "mask": False,
            "lambda_energy": 0.1,
            "lambda_cov_diag": 0.005,

            "num_filters":3,
            "kernel_size":3,

            "explained_var":0.9,

            "kernel": "rbf",
            "gamma": "auto",
            "degree": 3,
            "coef0": 0.0,
            "tol": 0.001,
            "cache_size": 200,
            "shrinking": True,
            "nu": 0.48899475599830133,
            "step_max": 5,

            "n_trees": 100,
            "max_samples": "auto",
            "max_features": 1,
            "bootstrap": False,
            "random_state": 42,
            "verbose": 0,
            "n_jobs": 1,
            "contamination": 0.5,


            "nz":10,
            "beta":0.5

        }


    return config
def getModel(config):
    method = config["model_name"]
    module = import_module("Models."+method+".Model")
    # 获取类引用
    clazz = getattr(module, method)

    # 创建类的实例
    model = clazz(config).float()
    # model = model_dict[method].Model(args).float()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    return model

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def getDatasetSplitConfig():
    config = {
        "SKAB":26322,
        "PMS":53122,
        "DMDS":200000,
        "WADI":130000,
        "SWAT":155000,

    }
    return config

def checkHolderExist(path):
    # 判断文件夹是否存在
    if not os.path.exists(path):
        # 如果文件夹不存在，则创建它
        os.makedirs(path)

def splitFiles(files):
    random.shuffle(files)
    split_index = len(files) // 3
    return files[:split_index], files[split_index:]

def convertRecToWindow(dataset = "WADI",window_size = 100):
    # 分割出新旧数据后，转变数据为滑动窗口
    mode = "old"
    recom_dataset_path = "./RecomData/" + mode + "/" + dataset
    data_files = os.listdir(recom_dataset_path + "/train")
    for file in data_files:
        writeWindowDataset(base_path=recom_dataset_path, filename=file, window_size=window_size)

    mode = "new"
    recom_dataset_path = "./RecomData/" + mode + "/" + dataset
    data_files = os.listdir(recom_dataset_path + "/train")
    for file in data_files:
        writeWindowDataset(base_path=recom_dataset_path, filename=file, window_size=window_size)




In [5]:
def processWADI(dataset,step):

    dataset_split_config = getDatasetSplitConfig()
    dataset_path = "./Data/" + dataset
    if step == 1:


        savepath_train_old = "./RecomData/old/" + dataset + "/train"
        savepath_train_new = "./RecomData/new/" + dataset + "/train"




        checkHolderExist(savepath_train_old)
        checkHolderExist(savepath_train_new)



        # 划分旧数据和新数据


        data_train_path = dataset_path + "/train/" + dataset + ".csv"



        data_train = pd.read_csv(data_train_path, header=None).to_numpy()


        data_train[np.isnan(data_train)] = 0







        data_train = minMaxNormalization(data_train)


        np.save(savepath_train_old + "/" + dataset + ".npy", data_train)
        np.save(savepath_train_new + "/" + dataset + ".npy", data_train)




    elif step == 2:
        data_test_path = dataset_path + "/test/" + dataset + ".csv"
        data_test = pd.read_csv(data_test_path, header=None).to_numpy()
        data_test[np.isnan(data_test)] = 0

        savepath_test_old = "./RecomData/old/" + dataset + "/test"
        savepath_test_new = "./RecomData/new/" + dataset + "/test"

        checkHolderExist(savepath_test_new)
        checkHolderExist(savepath_test_old)

        split_index = dataset_split_config[dataset]

        old_data_test = data_test[:split_index, :]
        new_data_test = data_test[split_index:, :]

        old_data_test = minMaxNormalization(old_data_test)
        new_data_test = minMaxNormalization(new_data_test)

        np.save(savepath_test_old + "/" + dataset + ".npy", old_data_test)
        np.save(savepath_test_new + "/" + dataset + ".npy", new_data_test)


    elif step == 3:
        savepath_label_old = "./RecomData/old/" + dataset + "/label"
        savepath_label_new = "./RecomData/new/" + dataset + "/label"


        checkHolderExist(savepath_label_old)
        checkHolderExist(savepath_label_new)
        data_label_path = dataset_path + "/label/" + dataset + ".csv"
        label = pd.read_csv(data_label_path, header=None).to_numpy().squeeze()
        split_index = dataset_split_config[dataset]
        old_label = label[:split_index]
        new_label = label[split_index:]

        np.save(savepath_label_old + "/" + dataset + ".npy", old_label)
        np.save(savepath_label_new + "/" + dataset + ".npy", new_label)




def datasetProcess():
    dataset_pair = [ ("UCR", False),  ("SMD", False), ("SMAP", False), ("SKAB", True),
                   ("PMS", True), ("MSL", False), ("DMDS", True)]

    config = getConfigs()

    dataset_split_config = getDatasetSplitConfig()

    window_size = config["window_size"]

    for dataset, onlyone in dataset_pair:
        print("dataset:",dataset)
        dataset_path = "./Data/" + dataset

        savepath_train_old = "./RecomData/old/" + dataset + "/train"
        savepath_train_new = "./RecomData/new/" + dataset + "/train"


        savepath_test_old = "./RecomData/old/" + dataset + "/test"
        savepath_label_old = "./RecomData/old/" + dataset + "/label"

        savepath_test_new = "./RecomData/new/" + dataset + "/test"
        savepath_label_new = "./RecomData/new/" + dataset + "/label"



        checkHolderExist(savepath_train_old)
        checkHolderExist(savepath_train_new)
        checkHolderExist(savepath_test_old)
        checkHolderExist(savepath_label_old)
        checkHolderExist(savepath_test_new)
        checkHolderExist(savepath_label_new)

        #划分旧数据和新数据

        if onlyone:
            data_train_path = dataset_path + "/train/" + dataset + ".csv"
            data_test_path = dataset_path + "/test/" + dataset + ".csv"
            data_label_path = dataset_path + "/label/" + dataset + ".csv"



            data_train = pd.read_csv(data_train_path, header=None).to_numpy()
            data_test = pd.read_csv(data_test_path, header=None).to_numpy()

            data_train[np.isnan(data_train)] = 0
            data_test[np.isnan(data_test)] = 0


            label = pd.read_csv(data_label_path, header=None).to_numpy().squeeze()


            split_index = dataset_split_config[dataset]

            old_data_test = data_test[:split_index,:]
            new_data_test = data_test[split_index:, :]

            old_label = label[:split_index]
            new_label = label[split_index:]


            data_train = minMaxNormalization(data_train)
            old_data_test = minMaxNormalization(old_data_test)
            new_data_test = minMaxNormalization(new_data_test)


            np.save(savepath_train_old + "/" + dataset + ".npy", data_train)
            np.save(savepath_train_new + "/" + dataset + ".npy", data_train)


            np.save(savepath_test_old + "/" + dataset + ".npy",  old_data_test)
            np.save(savepath_test_new + "/" + dataset + ".npy",  new_data_test)

            np.save(savepath_label_old + "/" + dataset + ".npy",  old_label)
            np.save(savepath_label_new + "/" + dataset + ".npy", new_label)

            del data_train
            del old_data_test
            del new_data_test
            del old_label
            del new_label

        else:

            data_train_path = dataset_path + "/train/"
            data_test_path = dataset_path + "/test/"
            data_label_path = dataset_path + "/label/"



            data_files = os.listdir(data_train_path)


            #随机划分新旧数据
            files_new, files_old = splitFiles(data_files)
            for file in files_new:
                try:
                    data_train = pd.read_csv(os.path.join(data_train_path, file), header=None).to_numpy()
                    data_test = pd.read_csv(os.path.join(data_test_path, file), header=None).to_numpy()

                    data_train[np.isnan(data_train)] = 0
                    data_test[np.isnan(data_test)] = 0


                    label = pd.read_csv(os.path.join(data_label_path, file), header=None).to_numpy().squeeze()

                    data_train = minMaxNormalization(data_train)
                    data_test = minMaxNormalization(data_test)



                    filename = file.split(".")[0]
                    np.save(savepath_train_new + "/" + filename + ".npy", data_train)
                    np.save(savepath_test_new + "/" + filename + ".npy", data_test)
                    np.save(savepath_label_new + "/" + filename + ".npy", label)
                except Exception as e:
                    # 打印错误信息并跳过该文件
                    print(f"Error occurred while processing file {file}: {e}")
                    continue

            for file in files_old:
                try:
                    data_train = pd.read_csv(os.path.join(data_train_path, file), header=None).to_numpy()
                    data_test = pd.read_csv(os.path.join(data_test_path, file), header=None).to_numpy()

                    data_train[np.isnan(data_train)] = 0
                    data_test[np.isnan(data_test)] = 0


                    label = pd.read_csv(os.path.join(data_label_path, file), header=None).to_numpy().squeeze()

                    data_train = minMaxNormalization(data_train)
                    data_test = minMaxNormalization(data_test)


                    filename = file.split(".")[0]
                    np.save(savepath_train_old + "/" + filename + ".npy", data_train)
                    np.save(savepath_test_old + "/" + filename + ".npy", data_test)
                    np.save(savepath_label_old + "/" + filename + ".npy", label)
                except Exception as e:
                    # 打印错误信息并跳过该文件
                    print(f"Error occurred while processing file {file}: {e}")
                    continue


        #分割出新旧数据后，转变数据为滑动窗口
        mode = "old"
        recom_dataset_path =  "./RecomData/" + mode +"/" + dataset
        data_files = os.listdir(recom_dataset_path + "/train")
        for file in data_files:
            writeWindowDataset(base_path=recom_dataset_path,filename=file,window_size=window_size)


        mode = "new"
        recom_dataset_path = "./RecomData/" + mode + "/" + dataset
        data_files = os.listdir(recom_dataset_path + "/train")
        for file in data_files:
            writeWindowDataset(base_path=recom_dataset_path, filename=file, window_size=window_size)





In [6]:


def writeWindowDataset(base_path,filename,window_size):
    '''
    针对单个地址转化窗口保存，window_size由config指定
    '''



    data_train = np.load(base_path+"/train/"+filename)
    data_test = np.load(base_path+"/test/"+filename)
    label = np.load(base_path+"/label/"+filename)


    train_window = convertToSlidingWindow(data_train, window_size=window_size)
    test_window = convertToSlidingWindow(data_test, window_size=window_size)
    label = label[window_size - 1:]

    print("test_window shape:",test_window.shape)
    print("label shape:",label.shape)

    savepath_train = base_path + "/window/train/"
    savepath_test = base_path + "/window/test/"
    savepath_label = base_path + "/window/label/"
    checkHolderExist(savepath_train)
    checkHolderExist(savepath_test)
    checkHolderExist(savepath_label)

    np.save(savepath_train + "/" + filename , train_window)
    np.save(savepath_test + "/" + filename , test_window)
    np.save(savepath_label + "/" + filename , label)


def evalOneDatasetFile(dataset_name,filename,mode = "old"):
    config = getConfigs()
    #model_list = ["LSTMVAE","LSTMAE","NASALSTM","DAGMM","TRANSFORMER","TCNAE","UAE","TRANAD","OmniAnomaly","PCAAD","IForestAD"]
    model_list = ["LSTMV2"]
    # model_list = ["LSTMVAE","PCAAD"]
    base_path = os.path.dirname(os.path.abspath(__file__))
    #get data
    window_size = config["window_size"]
    data_train,data_test,label = readData(dataset_path = base_path + "/RecomData/" + mode + "/" + dataset_name ,filename = filename,file_type = "npy")
    label = label[window_size - 1:]
    print("data_train shape:",data_train.shape)
    print("data_test shape:", data_test.shape)
    print("label shape:", label.shape)
    input_dim = data_train.shape[-1]

    config["device"] = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    config["base_path"] = base_path
    config["input_size"] = input_dim

    for method in model_list:
        config["model_name"] = method

        if method in ["TRANSFORMER","TRANAD"]:
            config["epoch"] = 5
        else:
            config["epoch"] = 2

        print("training method:",method)

        model = getModel(config)

        config["model_param_num"] = count_parameters(model)
        config["identifier"] = dataset_name+"-"+method
        config["train_start_time"] = time.time()
        # train model
        model.fit(train_data = data_train,write_log=True)
        config["train_end_time"] = time.time()

        print("finish training method:",method," cost time:",config["train_end_time"] - config["train_start_time"])

        config["test_start_time"] = time.time()
        anomaly_scores = model.test(data_test)
        config["test_end_time"] = time.time()
        ori_predict_labels, ori_f1, ori_threshold = model.getBestPredict(anomaly_score=anomaly_scores, n_thresholds=25,
                                                             ground_truth_label=label,protocol="")


        apa_predict_labels, apa_f1, apa_threshold = model.getBestPredict(anomaly_score=anomaly_scores, n_thresholds=25,
                                                                         ground_truth_label=label,
                                                                         protocol="apa")

        pa_predict_labels, pa_f1, pa_threshold = model.getBestPredict(anomaly_score=anomaly_scores, n_thresholds=25,
                                                                      ground_truth_label=label,
                                                                      protocol="pa")

        (tp, fp, tn, fn) = countResult(predict_labels=ori_predict_labels, ground_truth=label)
        config["ori_tp"] = float(tp)
        config["ori_fp"] = float(fp)
        config["ori_tn"] = float(tn)
        config["ori_fn"] = float(fn)

        print("finish evaluating method:", method)
        # visualization
        plot_yaxis = []
        plot_yaxis.append(anomaly_scores)
        plot_yaxis.append(ori_predict_labels)
        plot_yaxis.append(apa_predict_labels)
        plot_yaxis.append(pa_predict_labels)

        plot_path = base_path + "/Plots/recommondation/" + mode + "/" + dataset_name +"/" + filename

        checkHolderExist(plot_path)

        plotAllResult(x_axis=np.arange(len(anomaly_scores)), y_axises=plot_yaxis, title=config["model_name"],
                      save_path=plot_path + "/" + method + ".pdf",
                      segments=findSegment(label),
                      threshold=None)

        # config["anomaly_score"] = anomaly_scores.tolist()
        score_save_path = base_path + "/RecomData/scores/" + mode + "/"  + dataset_name + "/" + filename

        checkHolderExist(score_save_path)
        np.save(score_save_path + "/"  + method +".npy",anomaly_scores)
        # config["ori_predict_labels"] = ori_predict_labels.tolist()
        # config["pa_predict_labels"] = pa_predict_labels.tolist()
        # config["apa_predict_labels"] = apa_predict_labels.tolist()

        config["ori_f1"] = ori_f1
        config["apa_f1"] = apa_f1
        config["pa_f1"] = pa_f1

        config["ori_threshold"] = ori_threshold
        config["apa_threshold"] = apa_threshold
        config["pa_threshold"] = pa_threshold
        config["device"] = "cuda" if torch.cuda.is_available() else "cpu"
        wirteLog(base_path + "/Logs/recommondation/" + mode + "/"  + dataset_name + "/" + filename ,method,config)



    print("finish training model. start to test model.")





    

In [7]:
def sampleFromWindowData(data: np.ndarray,sample_num:int,indices:np.ndarray = np.array([])):
    length = len(data)

    results = []
    if len(indices) == 0 :
        # 计算均匀间隔
        interval = length // sample_num
        if interval < sample_num :
            indices = np.random.choice(length, sample_num, replace=False)
        else:
            indexes = []
            for i in range(sample_num):
                idx = random.randint(i*interval,(i+1)*interval-1)
                indexes.append(idx)
            indices = indexes

    for sample_index in indices:
        results.append(data[sample_index])
    
    return results,indices

In [8]:
def getEvaluationResult(mode = "old",dataset_list = [],method_list = []):
    path = "./Logs/recommondation/" + mode +"/"
    result = {}
    for dataset,isonly in dataset_list:

        result[dataset] = {}

        files_path = path + dataset

        file_names = os.listdir(files_path)

        for file_name in file_names:
            file_name = file_name.split(".")[0]

            result[dataset][file_name] = {}

            for method in method_list:
                result[dataset][file_name][method] = {}
                eval_path = files_path + "/" + file_name + "/" + method + ".json"
                with open(eval_path, "r") as file:
                    data_dict = json.load(file)

                result[dataset][file_name][method]["ori_f1"] = data_dict["ori_f1"]
                result[dataset][file_name][method]["pa_f1"] = data_dict["pa_f1"]
                result[dataset][file_name][method]["ori_f1"] = data_dict["ori_f1"]

                result[dataset][file_name][method]["pa_threshold"] = data_dict["pa_threshold"]
                result[dataset][file_name][method]["apa_threshold"] = data_dict["apa_threshold"]
                result[dataset][file_name][method]["ori_threshold"] = data_dict["ori_threshold"]

    return result

def countSame(sample,all_sample):
    count = np.sum(np.all(sample == all_sample, axis=(1, 2)))
    return count

def batchDiscretize(all_sample):
    result = []
    for item in all_sample:
        result.append(discretize(item))
    return result
def discretize(data):
    """
    将形状为[batch, window, channel]数值离散化
    """
    # 创建一个存储离散化结果的新数组
    data = np.floor(data * 10) / 10  # 先乘以10，再使用floor，然后再除以10
    return data

In [9]:
def getDatasetDetectability(origin_sample_list,new_sample_list,old_anomaly_scores,threshold,label_samples ,params):
    total_dec = 0
    for new_index, new_sample in enumerate(new_sample_list):
        m_dec = getMatchScore(sample = new_sample,ori_sample_list = origin_sample_list,threshold = threshold,anomaly_score = old_anomaly_scores,label_samples = label_samples,params = params)
        total_dec += m_dec
    return total_dec



In [10]:
# def getMatchScore(sample,ori_sample_list,threshold,anomaly_score,label_samples,params):
#     m_dec = 0
#     recall = params["recall"]
#     precision = params["precision"]
#     # ratio = params["anomaly_ratio"]
#     for index,ori_sample in enumerate(ori_sample_list):
#         similarity = getSimilarity(sample, ori_sample)
#         if similarity < threshold:
#             m_dec +=  np.sum(  precision * np.multiply(label_samples[index],anomaly_score[index] ) +  recall * np.multiply((1 - label_samples[index]),(1-anomaly_score[index])) )

#     return m_dec


In [11]:
def getMatchScore(sample,ori_sample_list,threshold,anomaly_score,label_samples,params):
    m_dec = 0
    recall = params["recall"] + 1e-6
    precision = params["precision"] + 1e-6
    f1 = params["f1"] + 1e-6
    ratio = params["anomaly_ratio"]
    for index,ori_sample in enumerate(ori_sample_list):
        similarity = getSimilarity(sample, ori_sample)
        if similarity < threshold:
            m_dec +=  np.sum( (1/(1-max(f1,precision))) *  np.multiply(label_samples[index],anomaly_score[index] ) + ratio * recall * np.multiply((1 - label_samples[index]),(1-anomaly_score[index])) )

    return m_dec

In [12]:
def getDatasetSimilarity(origin_sample_list,new_sample_list,old_anomaly_scores,old_label_samples,threshold = 0.5,params = {}):

    '''
    计算新数据列表和旧数据列表的相似性，返回列表
    :param origin_sample_list: 需要比较的旧数据的样本列表,即窗口列表
    :param new_sample_list: 需要比较的新数据的样本列表,即窗口列表
    :return:返回列表格式，每个新数据样本对应的相似性最大的旧数据样本的Index以及相似性数值。 [(max_similarity_index,max_similarity)]
    '''
    origin_sample_list = batchDiscretize(origin_sample_list)
    new_sample_list = batchDiscretize(new_sample_list)




    new_counts_map = getDistinctAndNum(new_sample_list)

    ori_len = len(origin_sample_list)
    new_len = len(new_sample_list)




    new_sample_list = unique(new_sample_list)


    p = np.zeros(len(new_sample_list))
    for index,item in enumerate(new_sample_list):
        key = getMatrixKey(item)
        p[index] = new_counts_map[key]

    p = Softmax(p + 1e-7)

    q = np.zeros_like(p)

    total_c = 0
    c_list = []




    for new_index,new_sample in enumerate(new_sample_list):

        counts,similar_sample_index_list = calculateSimilarityCounts(new_sample, origin_sample_list, threshold)
        q[new_index] = counts
        total_c += counts
        c_list = list(set(c_list + similar_sample_index_list))

    len_cd1 =  len(c_list)

    q = q / (ori_len - len_cd1 + total_c )
    q = Softmax(q + 1e-8)
    dataset_similarity = 1 / (p * np.log(p/q) +1e-8).sum()

    

    m_dec_total = getDatasetDetectability(origin_sample_list, new_sample_list, old_anomaly_scores, threshold,old_label_samples,params)
    #print("dataset_similarity = ", dataset_similarity,"m_dec_total = ", m_dec_total )
    total_similarity = dataset_similarity * m_dec_total

    return total_similarity



In [13]:
def getEvalCount(mode="old", dataset="", method_list=[]):
    path = "./Logs/recommondation/" + mode + "/"
    result = {}

    result[dataset] = {}

    files_path = path + dataset

    file_names = os.listdir(files_path)

    for file_name in file_names:
        file_name = file_name.split(".")[0]

        result[dataset][file_name] = {}

        for method in method_list:
            result[dataset][file_name][method] = {}
            eval_path = files_path + "/" + file_name + "/" + method + ".json"
            with open(eval_path, "r") as file:
                data_dict = json.load(file)

            result[dataset][file_name][method]["tp"] = data_dict["ori_tp"]
            result[dataset][file_name][method]["fp"] = data_dict["ori_fp"]
            result[dataset][file_name][method]["tn"] = data_dict["ori_tn"]
            result[dataset][file_name][method]["fn"] = data_dict["ori_fn"]
            if (data_dict["ori_tp"] + data_dict["ori_fp"]) <= 0 :
                result[dataset][file_name][method]["precision"] = 0
            else:
                result[dataset][file_name][method]["precision"] = data_dict["ori_tp"] / (data_dict["ori_tp"] + data_dict["ori_fp"])

            if (data_dict["ori_tp"] + data_dict["ori_fn"]) <= 0:
                result[dataset][file_name][method]["recall"] = 0
            else:
                result[dataset][file_name][method]["recall"] = data_dict["ori_tp"] / (data_dict["ori_tp"] + data_dict["ori_fn"])
                

            result[dataset][file_name][method]["f1"] = data_dict["ori_f1"]

    return result

In [14]:
def calculateSimilarityCounts(sample,ori_sample_list,threshold):
    counts = 0
    similar_sample_index_list = []

    for index,ori_sample in enumerate(ori_sample_list):
        if ori_sample.shape != sample.shape:
            print("shape dont match!:",ori_sample.shape,sample.shape)
        ori_sample = ori_sample.flatten()
        
        similarity = getSimilarity(sample.flatten() ,ori_sample)
   
        if similarity < threshold:
            # print("similarity:",similarity," threshold:",threshold)
            counts += 1
            similar_sample_index_list.append(index)

    return counts,similar_sample_index_list


In [15]:
def getSimilarity(origin_sample,new_sample):
    '''
    具体计算相似性的函数，相似性的计算逻辑更改时修改此处。如新添加了相似性计算函数
    :param origin_sample:
    :param new_sample:
    :return:
    '''
    prob_origin_sample = Softmax(origin_sample + 1e-7)
    prob_new_sample = Softmax(new_sample + 1e-7)

    kl = KLDivergence(prob_origin_sample,prob_new_sample)

    # js = JSDivergence(prob_origin_sample,prob_new_sample)

    return kl

In [16]:
def sampleAndMatch(dataset,old_filename,new_filename,method_list,sample_num = 100,threshold = 0.5):
    config = getConfigs()
    # print("sample - dataset:", dataset)
    dataset_old_path = "./RecomData/old/" + dataset + "/window/test/" + old_filename + ".npy"
    dataset_new_path = "./RecomData/new/" + dataset + "/window/test/" + new_filename + ".npy"
    dataset_old_label_path = "./RecomData/old/" + dataset + "/window/label/" + old_filename + ".npy"


    old_window_data = np.load(dataset_old_path)
    new_window_data = np.load(dataset_new_path)

    old_data_length = int(len(old_window_data)*0.95)


    old_label_data = np.load(dataset_old_label_path)


    old_window_data = old_window_data[-old_data_length:]
    new_window_data = new_window_data[-old_data_length:]
    old_label_data = old_label_data[-old_data_length:]
    
    old_window_samples,old_indices = sampleFromWindowData(old_window_data,sample_num)
    new_window_samples,new_indices = sampleFromWindowData(new_window_data,sample_num)
    old_label_samples,new_indices = sampleFromWindowData(old_label_data,sample_num)


    # print("new dataset . len: ", len(new_window_samples)," shape:",old_window_samples[0].shape)

    # new_window_samples = unique(new_window_samples)

    method_recommond_score = []
    method_score_map = {}
    all_params = getEvalCount(mode="old", dataset=dataset, method_list=method_list)
    anomaly_ratio = np.sum(old_label_data)/(len(old_label_data) * config["window_size"])
    for method in method_list:

        score_path = "./RecomData/scores/old/" + dataset + "/" + old_filename + "/" + method + ".npy"
        anomaly_scores = np.load(score_path)
        anomaly_scores = anomaly_scores[:, np.newaxis]

        anomaly_scores = convertToWindow(anomaly_scores,config["window_size"]).squeeze()
   
        anomaly_scores = anomaly_scores[-old_data_length:]
        anomaly_scores_samples,_ = sampleFromWindowData(anomaly_scores,sample_num,indices=old_indices)
        #old_label_samples,_ = sampleFromWindowData(old_label_data,sample_num,indices=old_indices)

        params = all_params[dataset][old_filename][method]
        params["anomaly_ratio"] = anomaly_ratio
        total_dataset_recommond_score = getDatasetSimilarity(old_window_samples,new_window_samples,old_anomaly_scores=anomaly_scores_samples,old_label_samples = old_label_samples,threshold = threshold,params=params)
        # print("method:",method," score:",total_dataset_recommond_score)
        method_score_map[method] = total_dataset_recommond_score
        method_recommond_score.append(total_dataset_recommond_score)


    max_score_index = np.array(method_recommond_score).argmax(axis=0)
    max_score = np.array(method_recommond_score).max(axis=0)
    method_score_map = dict(sorted(method_score_map.items(), key=lambda x: x[1], reverse=True))
    recommon_method = method_list[max_score_index]
    return recommon_method,max_score,method_score_map


In [17]:

def bordaAggregation(rank_list,method_list):

    
    file_list = []
    for item in rank_list:
        sorted_res = sorted(item.items(), key=lambda x: x[1], reverse=True)
        mapt = {}
        index = 1

        for item in sorted_res:
        
            mapt[item[0]] = index
            index += 1

        file_list.append(mapt)

    
    rank_map = {
    }

    for method in method_list:
        if rank_map.get(method) == None:
            rank_map[method] = []

        for rank_file in file_list:
            rank_map[method].append(rank_file[method])


    num_competitors = len(rank_map.keys())

    scores = {method: 0 for method in method_list}

    for competitor, ranks in rank_map.items():
        for rank in ranks:
            scores[competitor] += (num_competitors - rank)

    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return sorted_scores

In [18]:
def recommendAll(dataset_list=[], method_list = ["LSTMVAE","LSTMAE","NASALSTM","DAGMM","TRANSFORMER","TCNAE","UAE","TRANAD","OmniAnomaly","PCAAD","IForestAD"]):


    file_recommond_method_list = []

    dataset_recommond_rank = {}

    for dataset,isonly in dataset_list:
        print("recommending dataset:",dataset)
        if isonly:
            old_filename = dataset
            new_filename = dataset
            recommond_method,max_score,rank_map = sampleAndMatch(dataset,old_filename=old_filename,new_filename=new_filename,method_list=method_list,sample_num=100,threshold = 0.5)
            file_recommond_method_list.append((dataset, dataset , recommond_method))
            dataset_recommond_rank[dataset] = rank_map
            print("recommond method:", recommond_method)
            print("method rank:",rank_map)
        else:


            old_data_path = "./RecomData/old/" + dataset + "/window/test/"
            new_data_path = "./RecomData/new/" + dataset + "/window/test/"

            old_data_files = os.listdir(old_data_path)
            new_data_files = os.listdir(new_data_path)

            file_recommond_rank_list = []
            for new_filename in new_data_files:
                file_recommond_rank_map = {}
                print("new_filename:",new_filename)
                total_rec_method = ""
                total_max_score = 0
                for old_filename in old_data_files:
                    # print("old_filename:", old_filename)
                    recommond_method, max_score,rank_map = sampleAndMatch(dataset, old_filename=old_filename.split(".")[0],
                                                                new_filename=new_filename.split(".")[0], method_list=method_list,
                                                                sample_num=100,threshold = 0.5)

                    for (method,score) in rank_map.items():
                        if file_recommond_rank_map.get(method) == None:
                            file_recommond_rank_map[method] = score
                        else:
                            file_recommond_rank_map[method] = max(file_recommond_rank_map[method],score)

                    # print("recommond method:",recommond_method)
                    if max_score > total_max_score:
                        total_max_score = max_score
                        total_rec_method = recommond_method

                file_recommond_rank_list.append(file_recommond_rank_map)
                file_recommond_method_list.append((dataset,new_filename,total_rec_method))

            aggratated_rank = bordaAggregation(file_recommond_rank_list,method_list)
            dataset_recommond_rank[dataset] = aggratated_rank
    print("final result:")
    print(file_recommond_method_list)
    print("dataset_recommond_rank：\n",dataset_recommond_rank)


In [33]:
def compareRuntime(window_size,sample_num,threshold):
    dataset_list = [("SWAT", True), ("SKAB", True),
                    ("PMS", True),  ("DMDS", True)]
    method_list = ["LSTMVAE", "LSTMAE", "NASALSTM", "DAGMM", "TRANSFORMER", "TCNAE", "UAE", "TRANAD", "OmniAnomaly",
                   "PCAAD", "IForestAD"]


    running_time_list = []

    for dataset,isonly in dataset_list:
        print("recommending dataset:",dataset)

        old_filename = dataset
        new_filename = dataset





        print("sample - dataset:", dataset)
        dataset_old_path = "./RecomData/old/" + dataset + "/test/" + old_filename + ".npy"
        dataset_new_path = "./RecomData/new/" + dataset + "/test/" + new_filename + ".npy"
        dataset_old_label_path = "./RecomData/old/" + dataset + "/label/" + old_filename + ".npy"





        old_window_data = np.load(dataset_old_path)
        new_window_data = np.load(dataset_new_path)

        print("old data length:", len(old_window_data))
        print("new data length:", len(new_window_data))

        old_label_data = np.load(dataset_old_label_path)

        all_params = getEvalCount(mode="old", dataset=dataset, method_list=method_list)
        anomaly_ratio = np.sum(old_label_data) / len(old_label_data)




        old_window_data = convertToSlidingWindow(old_window_data,window_size)
        new_window_data = convertToSlidingWindow(new_window_data,window_size)
        old_label_data = convertToSlidingWindow(old_label_data,window_size)



        old_data_length = int(len(old_window_data) * 0.95)

        old_window_data = old_window_data[-old_data_length:]
        new_window_data = new_window_data[-old_data_length:]
        old_label_data = old_label_data[-old_data_length:]

        starttime =  time.time()
        print("start sample. start time:", starttime)

        old_window_samples, old_indices = sampleFromWindowData(old_window_data, sample_num)
        new_window_samples, new_indices = sampleFromWindowData(new_window_data, sample_num)
        old_label_samples, new_indices = sampleFromWindowData(old_label_data, sample_num, old_indices)


        method_recommond_score = []
        method_score_map = {}


        for method in method_list:
            score_path = "./RecomData/scores/old/" + dataset + "/" + old_filename + "/" + method + ".npy"
            anomaly_scores = np.load(score_path)
            anomaly_scores = anomaly_scores[:, np.newaxis]

            anomaly_scores = convertToWindow(anomaly_scores, window_size).squeeze()[-old_data_length:]

            anomaly_scores, new_indices = sampleFromWindowData(anomaly_scores, sample_num,old_indices)

            params = all_params[dataset][old_filename][method]
            params["anomaly_ratio"] = anomaly_ratio
            total_dataset_recommond_score = getDatasetSimilarity(old_window_samples, new_window_samples,
                                                                 old_anomaly_scores=anomaly_scores,
                                                                 old_label_samples=old_label_samples,
                                                                 threshold=threshold, params=params)
            # print("method:", method, " score:", total_dataset_recommond_score)
            method_score_map[method] = total_dataset_recommond_score
            method_recommond_score.append(total_dataset_recommond_score)
        


        endtime =  time.time()
        print("start sample. end time:", endtime)
        print("dataset:",dataset," running time:",endtime-starttime," window_size:",window_size," sample_num:",sample_num)
        running_time_list.append(endtime-starttime)
        max_score_index = np.array(method_recommond_score).argmax(axis=0)
        max_score = np.array(method_recommond_score).max(axis=0)
        method_score_map = dict(sorted(method_score_map.items(), key=lambda x: x[1], reverse=True))
        recommon_method = method_list[max_score_index]
        print("rank :", method_score_map)

    print(running_time_list)


In [34]:
compareRuntime(10,100,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719657976.1575925
start sample. end time: 1719657984.74522
dataset: SWAT  running time: 8.587627410888672  window_size: 10  sample_num: 100
rank : {'DAGMM': 6130551267.078446, 'IForestAD': 3645521620.678625, 'TRANAD': 3146135839.906925, 'LSTMAE': 3019675099.327545, 'OmniAnomaly': 2776737554.3363147, 'NASALSTM': 2505363278.2724543, 'LSTMVAE': 2453550888.591881, 'TRANSFORMER': 2023477313.2056267, 'UAE': 84671888.59373264, 'PCAAD': 42572857.808493935, 'TCNAE': 2962774.7001567734}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719657984.789423
start sample. end time: 1719657991.032423
dataset: SKAB  running time: 6.243000030517578  window_size: 10  sample_num: 100
rank : {'TCNAE': 35667811851.80256, 'PCAAD': 35571644469.48403, 'IForestAD': 32461934083.551968, 'NASALSTM': 32045133187.87235, 'T

In [35]:
compareRuntime(30,100,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658010.3007371
start sample. end time: 1719658020.7727683
dataset: SWAT  running time: 10.472031116485596  window_size: 30  sample_num: 100
rank : {'DAGMM': 184057.25103749358, 'NASALSTM': 148194.09731545785, 'TRANAD': 146659.5924101111, 'LSTMAE': 140630.83357479918, 'OmniAnomaly': 127802.9426395609, 'IForestAD': 122202.52208014381, 'TRANSFORMER': 116132.77411133044, 'LSTMVAE': 107435.0775905765, 'PCAAD': 3306.244002493162, 'UAE': 1914.6809595673292, 'TCNAE': 146.54593218431563}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658020.8306487
start sample. end time: 1719658028.0086691
dataset: SKAB  running time: 7.178020477294922  window_size: 30  sample_num: 100
rank : {'TCNAE': 107338386380.27792, 'PCAAD': 99457481073.23006, 'IForestAD': 97794494817.04028, 'TRANAD': 97153174144.6930

In [36]:
compareRuntime(60,100,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658052.513463
start sample. end time: 1719658066.3324385
dataset: SWAT  running time: 13.818975448608398  window_size: 60  sample_num: 100
rank : {'DAGMM': 19434120883.341087, 'TRANAD': 18908128086.169422, 'LSTMAE': 18636850865.105045, 'NASALSTM': 18400271754.795757, 'OmniAnomaly': 16515944062.22805, 'TRANSFORMER': 15455429295.29293, 'IForestAD': 15435178770.32758, 'LSTMVAE': 14492699861.03906, 'PCAAD': 2286746712.9351754, 'UAE': 392854504.079298, 'TCNAE': 18693411.396293372}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658066.428162
start sample. end time: 1719658074.3853722
dataset: SKAB  running time: 7.957210063934326  window_size: 60  sample_num: 100
rank : {'PCAAD': 211345368690.99628, 'TCNAE': 206344196436.81345, 'IForestAD': 203253059131.54358, 'NASALSTM': 195088929822.258

In [43]:
compareRuntime(120,100,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658534.1834843
start sample. end time: 1719658553.032073
dataset: SWAT  running time: 18.848588705062866  window_size: 120  sample_num: 100
rank : {'NASALSTM': 48766277248.97195, 'TRANAD': 46969611672.5451, 'LSTMAE': 45312035194.34856, 'OmniAnomaly': 42675880028.56282, 'IForestAD': 42093330096.3602, 'TRANSFORMER': 40730435751.72938, 'LSTMVAE': 34700121882.01268, 'DAGMM': 33476935804.317604, 'PCAAD': 2906049113.2938895, 'UAE': 750895194.5930206, 'TCNAE': 150243101.08596668}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658553.1797392
start sample. end time: 1719658563.1212559
dataset: SKAB  running time: 9.941516637802124  window_size: 120  sample_num: 100
rank : {'PCAAD': 413476310228.239, 'TCNAE': 411801410912.4646, 'IForestAD': 381273847720.6032, 'NASALSTM': 376704181762.39307, '

In [37]:
compareRuntime(30,50,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658101.7241843
start sample. end time: 1719658105.757632
dataset: SWAT  running time: 4.033447742462158  window_size: 30  sample_num: 50
rank : {'DAGMM': 7727765784.341623, 'IForestAD': 7386384687.628185, 'NASALSTM': 6999636751.964209, 'TRANAD': 6724650804.231507, 'LSTMAE': 6275101508.819317, 'OmniAnomaly': 6072891460.807126, 'TRANSFORMER': 5673535547.958933, 'LSTMVAE': 4984243524.597024, 'PCAAD': 776294714.4285936, 'UAE': 200957302.97558716, 'TCNAE': 4552483.418600458}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658105.8218186
start sample. end time: 1719658108.1277719
dataset: SKAB  running time: 2.30595326423645  window_size: 30  sample_num: 50
rank : {'PCAAD': 54315365236.022964, 'TCNAE': 53233267624.51654, 'TRANAD': 46218627757.615585, 'IForestAD': 46153907639.98751, 'NASALS

In [38]:
compareRuntime(30,100,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658119.1463757
start sample. end time: 1719658129.7795334
dataset: SWAT  running time: 10.633157730102539  window_size: 30  sample_num: 100
rank : {'DAGMM': 14818442750.044659, 'NASALSTM': 10428434130.978504, 'TRANAD': 9599279706.075548, 'IForestAD': 9441574674.39712, 'LSTMAE': 9011207776.141657, 'OmniAnomaly': 8598374083.912382, 'TRANSFORMER': 7509514236.724902, 'LSTMVAE': 7284438910.755794, 'PCAAD': 222189958.9551606, 'UAE': 155377227.27359945, 'TCNAE': 9117944.518275091}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658129.8382168
start sample. end time: 1719658136.9369214
dataset: SKAB  running time: 7.09870457649231  window_size: 30  sample_num: 100
rank : {'PCAAD': 102030327752.93608, 'TCNAE': 101503622982.50868, 'IForestAD': 98155507740.95752, 'NASALSTM': 97411630565.18414, 

In [44]:
compareRuntime(30,150,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658706.166537
start sample. end time: 1719658727.38955
dataset: SWAT  running time: 21.223012924194336  window_size: 30  sample_num: 150
rank : {'DAGMM': 16579825753.599308, 'TRANAD': 10572024974.221119, 'LSTMAE': 10293596999.407196, 'OmniAnomaly': 9029841332.268024, 'LSTMVAE': 8744236277.442081, 'IForestAD': 8631966310.509624, 'NASALSTM': 8379477862.634619, 'TRANSFORMER': 7102810975.324599, 'UAE': 134802049.49960035, 'PCAAD': 118107445.39896317, 'TCNAE': 45836673.64351945}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658727.4463546
start sample. end time: 1719658742.614192
dataset: SKAB  running time: 15.167837381362915  window_size: 30  sample_num: 150
rank : {'TCNAE': 158327738146.7699, 'PCAAD': 154466998708.05624, 'NASALSTM': 143363925041.32382, 'IForestAD': 143121576120.2277,

In [45]:
compareRuntime(30,200,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658784.113476
start sample. end time: 1719658820.9181669
dataset: SWAT  running time: 36.80469083786011  window_size: 30  sample_num: 200
rank : {'DAGMM': 30269531554.825886, 'NASALSTM': 25083804983.90277, 'TRANAD': 22950909079.14804, 'IForestAD': 22495068800.30727, 'LSTMAE': 22026090256.043156, 'OmniAnomaly': 20764972678.948856, 'TRANSFORMER': 19058363876.516357, 'LSTMVAE': 17166651012.449955, 'PCAAD': 1177261395.9340146, 'UAE': 436986147.6231437, 'TCNAE': 31687486.904341646}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658820.976197
start sample. end time: 1719658847.6054153
dataset: SKAB  running time: 26.629218339920044  window_size: 30  sample_num: 200
rank : {'TCNAE': 204338036649.983, 'PCAAD': 197511861073.21292, 'IForestAD': 193846021422.64114, 'NASALSTM': 190608844682.527

In [46]:
compareRuntime(30,250,0.5)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658980.2252874
start sample. end time: 1719659035.0363603
dataset: SWAT  running time: 54.8110728263855  window_size: 30  sample_num: 250
rank : {'TRANAD': 24286311832.59969, 'DAGMM': 24040545600.95054, 'NASALSTM': 23462782929.850517, 'LSTMAE': 23348654974.11821, 'OmniAnomaly': 21555806750.74443, 'IForestAD': 20795109559.562912, 'TRANSFORMER': 19344504737.931404, 'LSTMVAE': 18645378612.13691, 'PCAAD': 831355470.8690671, 'UAE': 400770599.8390877, 'TCNAE': 94211734.48910958}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719659035.099544
start sample. end time: 1719659076.231439
dataset: SKAB  running time: 41.13189506530762  window_size: 30  sample_num: 250
rank : {'PCAAD': 270713771273.68445, 'TCNAE': 268755363867.7518, 'IForestAD': 246335050602.67975, 'NASALSTM': 244339042094.30017, '

In [40]:
compareRuntime(30,100,0.05)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658236.756129
start sample. end time: 1719658247.078709
dataset: SWAT  running time: 10.322579860687256  window_size: 30  sample_num: 100
rank : {'DAGMM': 2043058665.4879045, 'TRANAD': 1693285308.3028655, 'LSTMAE': 1611009258.0884435, 'LSTMVAE': 1419634583.5046055, 'OmniAnomaly': 1403568962.9804344, 'IForestAD': 1355426398.5083768, 'NASALSTM': 1147002142.1886914, 'TRANSFORMER': 1046432643.5362458, 'UAE': 17607464.08379796, 'PCAAD': 16135169.79479244, 'TCNAE': 2443271.218714638}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658247.136587
start sample. end time: 1719658254.1930597
dataset: SKAB  running time: 7.0564727783203125  window_size: 30  sample_num: 100
rank : {'PCAAD': 51779725545.830185, 'IForestAD': 51552953288.72378, 'NASALSTM': 48682415431.81259, 'TCNAE': 48016469137.630

In [41]:
compareRuntime(30,100,0.1)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658276.3482707
start sample. end time: 1719658286.8291264
dataset: SWAT  running time: 10.480855703353882  window_size: 30  sample_num: 100
rank : {'DAGMM': 871657.0459106455, 'NASALSTM': 606911.0673924898, 'IForestAD': 560303.7783216538, 'TRANAD': 544043.5722322428, 'LSTMAE': 514429.46145990636, 'OmniAnomaly': 494839.9926988435, 'TRANSFORMER': 434898.8201384912, 'LSTMVAE': 402025.90602118516, 'PCAAD': 23177.338022526044, 'UAE': 9639.063090292608, 'TCNAE': 3098.2679219316387}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658286.885687
start sample. end time: 1719658294.1203895
dataset: SKAB  running time: 7.2347023487091064  window_size: 30  sample_num: 100
rank : {'PCAAD': 108620678926.68321, 'TCNAE': 106893793648.73833, 'NASALSTM': 99187338627.00949, 'IForestAD': 98879244138.7576

In [42]:
compareRuntime(30,100,1)

recommending dataset: SWAT
sample - dataset: SWAT
old data length: 155000
new data length: 294918
start sample. start time: 1719658316.6526926
start sample. end time: 1719658327.1589737
dataset: SWAT  running time: 10.50628113746643  window_size: 30  sample_num: 100
rank : {'DAGMM': 16250499655.0955, 'NASALSTM': 10987768043.059027, 'TRANAD': 10337945415.221573, 'LSTMAE': 9997987151.370258, 'IForestAD': 9622590786.386978, 'OmniAnomaly': 9272785595.95234, 'TRANSFORMER': 8240510046.486, 'LSTMVAE': 7831869817.751213, 'PCAAD': 470393577.04231524, 'UAE': 176558181.21021038, 'TCNAE': 9147733.679836221}
recommending dataset: SKAB
sample - dataset: SKAB
old data length: 26322
new data length: 11137
start sample. start time: 1719658327.2173345
start sample. end time: 1719658334.276241
dataset: SKAB  running time: 7.058906555175781  window_size: 30  sample_num: 100
rank : {'PCAAD': 104902823094.83214, 'TCNAE': 103915919226.75194, 'IForestAD': 99238161217.33371, 'NASALSTM': 96718901308.97609, 'TRA