In [3]:
import os

In [4]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view   
import optuna
import jenkspy

In [5]:
from one.generator.univariate import UnivariateDataGenerator
from one.models import *
from one.utils import *
from one.scorer.pot import *

In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
plt.rcParams["figure.figsize"] = 40,10
plt.rcParams["font.size"] = 15

# Generating Univariate Anomalies

In [None]:
generator = UnivariateDataGenerator(stream_length=5000)

In [None]:
generator.collective_seasonal_outliers(0.1, 1., 50)

## Visualization

### Train Set

In [None]:
plt.plot(generator.train)

### Test Set

In [None]:

fig, axes = plt.subplots(2)

axes[0].plot(generator.test)
axes[1].plot(generator.label)

## Save

In [None]:
SAVE_DIR = "./data/univar-synth/"

In [None]:
# Point Global
out_type = "point_global"
config_1 = [0.05, 1.1, 50] #ratio, factor, radius
config_2 = [0.05, 1.25, 50] #ratio, factor, radius
config_3 = [0.05, 1.5, 50] #ratio, factor, radius
config_4 = [0.05, 2, 50] #ratio, factor, radius
config_5 = [0.05, 3, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.point_global_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

In [None]:
# Point Contextual
out_type = "point_contextual"
config_1 = [0.05, 1.1, 50] #ratio, factor, radius
config_2 = [0.05, 1.25, 50] #ratio, factor, radius
config_3 = [0.05, 1.5, 50] #ratio, factor, radius
config_4 = [0.05, 2, 50] #ratio, factor, radius
config_5 = [0.05, 3, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.point_contextual_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)
    

In [None]:
# Collective Global
out_type = "collective_global"
config_1 = [0.05, 50, 1.1] #ratio, radius, coef
config_2 = [0.05, 50, 1.25] #ratio, radius, coef
config_3 = [0.05, 50, 1.5] #ratio, radius, coef
config_4 = [0.05, 50, 2] #ratio, radius, coef
config_5 = [0.05, 50, 3] #ratio, radius, coef

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    *args, coef = config
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.collective_global_outliers(*args, "square", coef=coef)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{coef}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{coef}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{coef}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

In [None]:
# Collective Trend
out_type = "collective_trend"
config_1 = [0.05, 0.01, 50] #ratio, factor, radius
config_2 = [0.05, 0.02, 50] #ratio, factor, radius
config_3 = [0.05, 0.03, 50] #ratio, factor, radius
config_4 = [0.05, 0.04, 50] #ratio, factor, radius
config_5 = [0.05, 0.05, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.collective_trend_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

In [None]:
# Collective Seasonal
out_type = "collective_seasonal"
config_1 = [0.1, 1.1, 50] #ratio, factor, radius
config_2 = [0.1, 1.25, 50] #ratio, factor, radius
config_3 = [0.1, 1.5, 50] #ratio, factor, radius
config_4 = [0.1, 2, 50] #ratio, factor, radius
config_5 = [0.1, 3, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.collective_seasonal_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

# Visualize Dataset

In [None]:
PATH0 = "./data/univar-synth/point_global/"
PATH1 = "./data/univar-synth/point_contextual/"
PATH2 = "./data/univar-synth/collective_global/"
PATH3 = "./data/univar-synth/collective_trend/"
PATH4 = "./data/univar-synth/collective_seasonal/"
PATHS = [PATH0, PATH1, PATH2, PATH3, PATH4]

In [None]:
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    
    for f in file_list:
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        fig, axes = plt.subplots(2)
        axes[0].set_title(f)
        axes[0].plot(test)
        axes[1].plot(labels)

# Scoring Helper

In [11]:
class ScoreCounter:
    def __init__(self):
        self.tp = 0
        self.fp = 0
        self.tn = 0
        self.fn = 0
        
    def process(self, preds, labels):
        preds = preds.copy()
        labels = labels.copy()
        ground_truth_ones = np.where(labels == 1)[0]
        pred_ones = np.where(preds == 1)[0]
        
        ranges = self._consecutive(ground_truth_ones)
        
        tp, fp, tn, fn = 0, 0, 0, 0
        
        for r in ranges:
            intersect = np.intersect1d(r, pred_ones, assume_unique=True)
            if intersect.size != 0:
                tp += r.size
                preds[intersect] = 0
                pred_ones = np.where(preds == 1)[0]
            else:
                fn += r.size
            
        fp += pred_ones.size
        tn += preds.size - tp - fp - fn
        
        self.tp += tp
        self.fp += fp
        self.tn += tn
        self.fn += fn
        
        
        return
        
        
    def _consecutive(self, data, stepsize=1):
        return np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
    
    
    @property
    def tpr(self):
        return self.tp/(self.fn+self.tp)
    
    @property
    def fpr(self):
        return self.fp/(self.tn+self.fp)
    
    @property
    def tnr(self):
        return self.tn/(self.tn+self.fp)
        
    @property
    def fnr(self):
        return self.fn/(self.fn+self.tp)
        
    @property
    def precision(self):
        return self.tp/(self.tp+self.fp)
    
    @property
    def recall(self):
        return self.tp/(self.tp+self.fn)
    
    @property
    def f1(self):
        return (2*self.precision*self.recall)/(self.precision+self.recall)
    
    

# Run Experiments

## Metric 2

### -- Setup

In [8]:
PATH0 = "../data/univar-synth/point_global/"
PATH1 = "../data/univar-synth/point_contextual/"
PATH2 = "../data/univar-synth/collective_global/"
PATH3 = "../data/univar-synth/collective_trend/"
PATH4 = "../data/univar-synth/collective_seasonal/"

In [9]:
PATHS = [PATH0, PATH1, PATH2, PATH3, PATH4]

In [10]:
SAVE_DIR = "../results/univar-synth/unsup1tuned-metric2/"

In [9]:
"""
UNSUPERVISED LEARNING METRICS
Author:     Bob Stienen
License:    MIT License
Source:     http://www.github.com/bstienen/AUMVC

Implementation of the Area under the Mass-Volume Curve algorithm as by
- Stephan Clémençon and Jeremie Jakubowicz, Scoring anomalies: a M-estimation
  formulation approach. 2013-04

Implementation is inspired by
   https://github.com/albertcthomas/anomaly_tuning
"""

import warnings
import numpy as np
from scipy.special import comb
from sklearn.metrics import auc


def aumvc(scoring_function,
          X_test,
          N_mc=100000,
          N_levelsets=100,
          normalise=True):
    """ Calculate the area under the mass-volume curve for an anomaly detection
    function or algorithm

    This function uses monte carlo sampling in the parameter space box spanned
    by the provided test data in order to estimate the level set of the
    scoring function. For higher dimensionalities the amount of sampled data
    points would yield this algorithm intractable. In these cases the use of
    the `aumvc_hd` function is advised instead.

    Parameters
    ----------
    scoring_function: function
        Function that takes datapoints as numpy.ndarray (nPoints, nFeatures)
        and returns an anomaly score. This score should be in range [0,1],
        where 1 indicates the point not being an anomaly (and 0 that the point
        *is* an anomaly).
    X_test: numpy.ndarray of shape (nPoints, nFeatures)
        Datapoints used for testing the algorithm.
    N_mc: int (default: 100,000)
        Number of datapoints to sample in the parameter space to estimate the
        level sets of the scoring function.
    N_levelsets: int (default: 100)
        Number of level sets to evaluate.
    normalise: bool (default: True)
        Indicates if output scores of the scoring_function should be normalised
        before calculating the mass-volume curve. """

    # Get ranges for the test data
    mins = np.amin(X_test, axis=0)
    maxs = np.amax(X_test, axis=0)
    
    if X_test.ndim==1:
        mins = np.array([mins])
        maxs = np.array([maxs])
       

    # Generate uniform MC data
    U = np.random.rand(N_mc, len(mins))*(maxs-mins)+mins

    # Calculate volume of total cube
    vol_tot_cube = np.prod(maxs - mins)

    # Score test and MC data
    score_U = scoring_function(U)
    score_test = scoring_function(X_test)

    # Do normalising if needed
    if normalise:
        minimum = min(np.amin(score_U), np.amin(score_test))
        maximum = max(np.amax(score_U), np.amax(score_test))
        score_U = (score_U - minimum) / (maximum - minimum)
        score_test = (score_test - minimum) / (maximum - minimum)

    # Calculate alphas to use
    alphas = np.linspace(0, 1, N_levelsets)

    # Compute offsets
    offsets = np.percentile(score_test, 100 * (1 - alphas))

    # Compute volumes of associated level sets
    volumes = (np.array([np.mean(score_U >= offset)
                        for offset in offsets]) * vol_tot_cube)

    # Calculating area under the curve
    area = auc(alphas, volumes)

    # Return area and curve variables
    return (area, alphas, volumes)


def aumvc_hd(scoring_function_generator,
             X_train,
             X_test,
             N_selected_dim=5,
             N_iterations=100,
             N_mc=100000,
             N_levelsets=1000,
             normalise=True):
    """ Calculate the area under the mass-volume curve for an anomaly detection
    function or algorithm working in high-dimensional parameter spaces

    The curse of dimensionality is avoided by taking the average over multiple
    AUMVC values for randomly selected subspaces of the parameter space under
    consideration. The AUMVCs are calculated using the `aumvc` function above.
    As this requires a retraining of the scoring function for each random
    subspace, the `aumvc_hd` function does not take a scoring function as
    input, but rather a generator of scoring functions. This function should
    take the training data as input and return a scoring function (see
    description of `aumvc` for requirements of this function).

    Parameters
    ----------
    scoring_function_generator: function
        Function that takes training datapoints as numpy.ndarray of shape
        (nPoints, nFeatures) and returns a scoring function. See description of
        `aumvc` function for requirements on the scoring function.
    X_train: numpy.ndarray of shape (nPoints, nFeatures)
        Data points for which randomly selected subspaces are passed to the
        scoring function generator for creation of the scoring function.
    X_test: numpy.ndarray of shape (nPoints, nFeatures)
        Data points used for testing the algorithm. Number of data points does
        not have to match the number of training points, but the number of
        features *does* have to match.
    N_selected_dim: int (default=5)
        Number of dimensions selected for the random subspace generation. This
        number should be equal to or smaller than the number of features in
        the testing data.
    N_iterations: int (default=100)
        Number of random subspaces have to be evaluated. A warning will be
        raised if this number is higher than the total number of unique
        combinations that can be randomly selected from the provided parameter
        space.
    N_mc: int (default=100,000)
        Number of datapoints to sample in the parameter space to estimate the
        level sets of the scoring function.
    N_levelsets: int (default=100)
        Number of level sets to evaluate.
    normalise: bool (default: True)
        Indicates if output scores of the scoring_function should be normalised
        before calculating the mass-volume curve. """

    # Check if N_selected_dim <= dim(X_test)
    data_dim = X_test.shape[1]
    if data_dim > N_selected_dim:
        raise Exception("""The number of dimensions to select in each iteration
is larger than the number of dimensions in the provided data.""")

    # Check if the dimensionality of training data matches the dimensionality
    # of the testing data
    if X_train.shape[1] != data_dim:
        raise Exception("""The number of features in the training data does not
match the number of features in the testing data.""")

    # Check if the number of unique random subspaces is significantly larger
    # (i.e. > a factor of 2) than the requested number of iterations
    N_unique = np.random.choice(data_dim, N_selected_dim, replace=False)
    if N_unique < 2 * N_selected_dim:
        warnings.warn("""The number of unique combinations of the dimensions of
the input space is smaller than the number of dimensions to select in each
iterations.""")

    # Initialise final AUMVC variable
    area_hd = 0

    # Run over each iteration
    for _ in range(N_iterations):

        # Make feature subselection
        features = np.random.choice(data_dim, N_selected_dim, replace=False)
        X_selection = X_test[:, features]
        X_train_selection = X_train[:, features]

        # Train scoring function
        scoring_function = scoring_function_generator(X_train_selection)

        # Calculate area under curve and collect it in final variable
        area, _, _ = aumvc(scoring_function,
                           X_selection,
                           N_mc,
                           N_levelsets,
                           normalise)
        area_hd += area

    # Return mean area
    return area_hd / N_iterations

In [10]:
optuna.logging.set_verbosity(optuna.logging.FATAL)

### Quantile

In [None]:
# Quantile Model
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        def objective(trial):
            window = trial.suggest_int("window", 100, 1000)
            threshold = trial.suggest_float("threshold", 0.95, 0.999)
            
            test_extend = np.concatenate((train[-window:], test))
            model = QuantileModel(window)
            scores = model.get_scores(test_extend)[window:] 
            
            auc, *_ = aumvc(lambda x: model.get_scores(x.flatten()), test)
            return auc
        
        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=50)
       
        window = study.best_params["window"]
        threshold = study.best_params["threshold"]
        model = QuantileModel(window, threshold)
        
        test_extend = np.concatenate((train[-window:], test))
        
        scores = model.get_scores(test_extend)[window:] 
        
        # Save results
        save = SAVE_DIR+"quantile/"+f
        os.makedirs(SAVE_DIR+"quantile/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(scores, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
       
    

457, 384, 18644, 515, 0.47016460905349794, 0.02018078620979609, 0.9798192137902039, 0.529835390946502, 0.5434007134363853, 0.47016460905349794, 0.5041367898510756


### MA

In [28]:
import warnings
warnings.filterwarnings('ignore')

# MA Model 
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            
            test_extend = np.concatenate((train[-window:], test))
            model = MovingAverageModel(window)
            scores = np.abs(model.get_scores(test_extend)[window:])
            
            auc, *_ = aumvc(lambda x: model.get_scores(x.flatten()), test)
            return auc
           
       
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=150)
       
        window = study.best_params["window"]
        
        model = MovingAverageModel(window)
        
        test_extend = np.concatenate((train[-window:], test))
        
        scores = np.abs(model.get_scores(test_extend)[window:] )
        
        # Get threshold (Not needed for Quantile)
        thres = jenkspy.jenks_breaks(scores, nb_class=20)[-2]
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"ma/"+f
        os.makedirs(SAVE_DIR+"ma/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

606, 86, 18942, 366, 0.6234567901234568, 0.0045196552449022495, 0.9954803447550977, 0.3765432098765432, 0.8757225433526011, 0.6234567901234568, 0.7283653846153846
139, 2101, 16920, 840, 0.14198161389172625, 0.1104568634666947, 0.8895431365333053, 0.8580183861082737, 0.06205357142857143, 0.14198161389172625, 0.08636222429325878
600, 1475, 17525, 400, 0.6, 0.07763157894736843, 0.9223684210526316, 0.4, 0.2891566265060241, 0.6, 0.3902439024390244
704, 358, 18738, 200, 0.7787610619469026, 0.018747381650607457, 0.9812526183493926, 0.22123893805309736, 0.6629001883239172, 0.7787610619469026, 0.7161749745676501
1871, 2142, 15987, 0, 1.0, 0.11815323514810525, 0.8818467648518947, 0.0, 0.46623473710441066, 1.0, 0.6359619306594153


### ARIMA

In [None]:
# ARIMA Model 
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        def objective(trial):
            s = ScoreCounter()
            
            p = trial.suggest_int("p", 1, 20)
            d = trial.suggest_int("d", 0, 3)
            q = trial.suggest_int("q", 0, 20)
            q_risk = trial.suggest_float("q_risk", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
 
            test_extend = np.concatenate((train[-window:], test))
                
            model = ARIMAModel(p, d, q)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend))

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q_risk, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            s.process(preds, labels)
        
            if s.tp == 0 and s.fp == 0: return -1
            if s.tp == 0 and s.fn == 0: return -1

            if s.precision == 0 and s.recall == 0: return -1
            if np.isnan(s.f1): return -1
            return s.f1
 
       
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=20)
       
        p = study.best_params["p"]
        d = study.best_params["d"]
        q = study.best_params["q"]
        q_risk = study.best_params["q_risk"]
        contam = study.best_params["contam"]
        
        model = ARIMAModel(p, d, q)
        model.fit(train)
        test_extend = np.concatenate((train[-window:], test))
        scores = np.abs(model.get_scores(test_extend))
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q_risk, contam)
        
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"arima/"+f
        os.makedirs(SAVE_DIR+"arima/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())


    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

### IForest

In [None]:
# IForest Model 
import warnings
warnings.filterwarnings('error')


for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        
        def objective(trial):
            s = ScoreCounter()
            q = trial.suggest_float("q", 1e-5, 1e-1)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
                
            model = IsolationForestModel()
            model.fit(train)
            scores = np.abs(model.get_scores(test))

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            s.process(preds, labels)
        
            if s.tp == 0 and s.fp == 0: return -1
            if s.tp == 0 and s.fn == 0: return -1

            if s.precision == 0 and s.recall == 0: return -1
            if np.isnan(s.f1): return -1
            return s.f1
 
            
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=150)
       
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        model = IsolationForestModel()
        model.fit(train)
        scores = np.abs(model.get_scores(test))
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"iforest/"+f
        os.makedirs(SAVE_DIR+"iforest/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())
        np.savetxt(save+"-scores.txt", scores)
        np.savetxt(save+"-preds.txt", preds)


    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
   

### Regression

In [None]:
# Regression Model 
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        def objective(trial):
            s = ScoreCounter()
            
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            lags = trial.suggest_int("lags", 1, 5)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
 
            test_extend = np.concatenate((train[-window:], test))
                
            model = RegressionModel(window, n_steps, lags)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            return compute_objective(labels, preds)

       
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=50)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        lags = study.best_params["lags"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        
        model = RegressionModel(window,n_steps, lags)
        model.fit(train)
        test_extend = np.concatenate((train[-window:], test))
        scores = np.abs(model.get_scores(test_extend)[0])
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"regression/"+f
        os.makedirs(SAVE_DIR+"regression/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())
        np.savetxt(save+"-scores.txt", scores)


    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

### NBEATS

In [None]:
%%capture
# NBEATSModel
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)

            test_extend = np.concatenate((train[-window:], test))
                
            model = NBEATSModel(window, n_steps, use_gpu=True)            
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])
            auc, *_ = aumvc(lambda x: model.get_scores(x)[0], test)
            return auc
 

        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = NBEATSModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        # Get threshold (Not needed for Quantile)
        thres = jenkspy.jenks_breaks(scores, nb_class=20)[-2]
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"nbeats/"+f
        os.makedirs(SAVE_DIR+"nbeats/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


### NHiTs

In [None]:
%%capture
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            
            test_extend = np.concatenate((train[-window:], test))
                
            model = NHiTSModel(window, n_steps, use_gpu=True)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            auc, *_ = aumvc(lambda x: model.get_scores(x)[0], test)
            return auc
 
        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = NHiTSModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        # Get threshold (Not needed for Quantile)
        thres = jenkspy.jenks_breaks(scores, nb_class=20)[-2]
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"nhits/"+f
        os.makedirs(SAVE_DIR+"nhits/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"nhits/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


### RNN(GRU)

In [None]:
%%capture
#supress output

for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            s = ScoreCounter()
            
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            

            test_extend = np.concatenate((train[-window:], test))
                
            model = RNNModel(window, n_steps, rnn_model="GRU")
            
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])
            
            auc, *_ = aumvc(lambda x: model.get_scores(x)[0], test)
            return auc
 
        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = RNNModel(window, n_steps, use_gpu=True, rnn_model="GRU")
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"rnn_gru/"+f
        os.makedirs(SAVE_DIR+"rnn_gru/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"rnn_gru/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}\n")


### TCN

In [None]:
%%capture
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 9, log=True)
            
            test_extend = np.concatenate((train[-window:], test))
            
            model = TCNModel(window, n_steps, use_gpu=True)
            
            model.fit(train)
            auc, *_ = aumvc(lambda x: model.get_scores(x)[0], test)
            return auc
 
        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]

        test_extend = np.concatenate((train[-window:], test))
        model = TCNModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        # Get threshold (Not needed for Quantile)
        thres = jenkspy.jenks_breaks(scores, nb_class=20)[-2]
 
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"tcn/"+f
        os.makedirs(SAVE_DIR+"tcn/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"tcn/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


### Transformer

In [None]:
%capture
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            
            test_extend = np.concatenate((train[-window:], test))
                
            model = TransformerModel(window, n_steps, use_gpu=True)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            return compute_objective(test, preds)
        

        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = TransformerModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"transformer/"+f
        os.makedirs(SAVE_DIR+"transformer/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"transformer/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


## Load Results

In [17]:
model = "transformer"
for path in PATHS:
    scorer = ScoreCounter()
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    for f in file_list:
        labels = np.loadtxt(path+f+"-labels.txt")
        preds = np.loadtxt(SAVE_DIR+f"{model}/"+f+"-preds.txt")
        scorer.process(preds, labels)
        
    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

86, 12, 19016, 886, 0.08847736625514403, 0.0006306495690561278, 0.9993693504309439, 0.911522633744856, 0.8775510204081632, 0.08847736625514403, 0.16074766355140185
61, 382, 18639, 918, 0.06230847803881512, 0.020083066084853583, 0.9799169339151464, 0.9376915219611849, 0.13769751693002258, 0.06230847803881512, 0.08579465541490858
400, 291, 18709, 600, 0.4, 0.01531578947368421, 0.9846842105263158, 0.6, 0.5788712011577424, 0.4, 0.47309284447072736
404, 688, 18408, 500, 0.4469026548672566, 0.03602848764139087, 0.9639715123586091, 0.5530973451327433, 0.36996336996337, 0.4469026548672566, 0.40480961923847697
1636, 607, 17522, 235, 0.8743987172634955, 0.033482265982679685, 0.9665177340173203, 0.12560128273650453, 0.7293802942487739, 0.8743987172634955, 0.7953330092367525
