In [1]:
import os

In [2]:
import jenkspy

In [3]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view   
import optuna

In [4]:
from one.generator.univariate import UnivariateDataGenerator
from one.models import *
from one.utils import *
from one.scorer.pot import *

In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
plt.rcParams["figure.figsize"] = 40,10
plt.rcParams["font.size"] = 15

# Generating Univariate Anomalies

In [None]:
generator = UnivariateDataGenerator(stream_length=5000)

In [None]:
generator.collective_seasonal_outliers(0.1, 1., 50)

## Visualization

### Train Set

In [None]:
plt.plot(generator.train)

### Test Set

In [None]:

fig, axes = plt.subplots(2)

axes[0].plot(generator.test)
axes[1].plot(generator.label)

## Save

In [None]:
SAVE_DIR = "./data/univar-synth/"

In [None]:
# Point Global
out_type = "point_global"
config_1 = [0.05, 1.1, 50] #ratio, factor, radius
config_2 = [0.05, 1.25, 50] #ratio, factor, radius
config_3 = [0.05, 1.5, 50] #ratio, factor, radius
config_4 = [0.05, 2, 50] #ratio, factor, radius
config_5 = [0.05, 3, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.point_global_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

In [None]:
# Point Contextual
out_type = "point_contextual"
config_1 = [0.05, 1.1, 50] #ratio, factor, radius
config_2 = [0.05, 1.25, 50] #ratio, factor, radius
config_3 = [0.05, 1.5, 50] #ratio, factor, radius
config_4 = [0.05, 2, 50] #ratio, factor, radius
config_5 = [0.05, 3, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.point_contextual_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)
    

In [None]:
# Collective Global
out_type = "collective_global"
config_1 = [0.05, 50, 1.1] #ratio, radius, coef
config_2 = [0.05, 50, 1.25] #ratio, radius, coef
config_3 = [0.05, 50, 1.5] #ratio, radius, coef
config_4 = [0.05, 50, 2] #ratio, radius, coef
config_5 = [0.05, 50, 3] #ratio, radius, coef

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    *args, coef = config
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.collective_global_outliers(*args, "square", coef=coef)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{coef}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{coef}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{coef}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

In [None]:
# Collective Trend
out_type = "collective_trend"
config_1 = [0.05, 0.01, 50] #ratio, factor, radius
config_2 = [0.05, 0.02, 50] #ratio, factor, radius
config_3 = [0.05, 0.03, 50] #ratio, factor, radius
config_4 = [0.05, 0.04, 50] #ratio, factor, radius
config_5 = [0.05, 0.05, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.collective_trend_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

In [None]:
# Collective Seasonal
out_type = "collective_seasonal"
config_1 = [0.1, 1.1, 50] #ratio, factor, radius
config_2 = [0.1, 1.25, 50] #ratio, factor, radius
config_3 = [0.1, 1.5, 50] #ratio, factor, radius
config_4 = [0.1, 2, 50] #ratio, factor, radius
config_5 = [0.1, 3, 50] #ratio, factor, radius

for idx, config in enumerate([config_1, config_2, config_3, config_4, config_5]):
    generator = UnivariateDataGenerator(stream_length=5000)
    generator.collective_seasonal_outliers(*config)
    
    # save train
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-train.txt"
    np.savetxt(SAVE_DIR+file_name, generator.train)
    
    # save test
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-test.txt"
    np.savetxt(SAVE_DIR+file_name, generator.test)
    
    # save labels
    file_name = f"{out_type}/{idx}-{out_type}-factor{config[1]}-labels.txt"
    np.savetxt(SAVE_DIR+file_name, generator.label)

# Visualize Dataset

In [None]:
PATH0 = "./data/univar-synth/point_global/"
PATH1 = "./data/univar-synth/point_contextual/"
PATH2 = "./data/univar-synth/collective_global/"
PATH3 = "./data/univar-synth/collective_trend/"
PATH4 = "./data/univar-synth/collective_seasonal/"
PATHS = [PATH0, PATH1, PATH2, PATH3, PATH4]

In [None]:
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    
    for f in file_list:
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        fig, axes = plt.subplots(2)
        axes[0].set_title(f)
        axes[0].plot(test)
        axes[1].plot(labels)

# Scoring Helper

In [7]:
class ScoreCounter:
    def __init__(self):
        self.tp = 0
        self.fp = 0
        self.tn = 0
        self.fn = 0
        
    def process(self, preds, labels):
        preds = preds.copy()
        labels = labels.copy()
        ground_truth_ones = np.where(labels == 1)[0]
        pred_ones = np.where(preds == 1)[0]
        
        ranges = self._consecutive(ground_truth_ones)
        
        tp, fp, tn, fn = 0, 0, 0, 0
        
        for r in ranges:
            intersect = np.intersect1d(r, pred_ones, assume_unique=True)
            if intersect.size != 0:
                tp += r.size
                preds[intersect] = 0
                pred_ones = np.where(preds == 1)[0]
            else:
                fn += r.size
            
        fp += pred_ones.size
        tn += preds.size - tp - fp - fn
        
        self.tp += tp
        self.fp += fp
        self.tn += tn
        self.fn += fn
        
        
        return
        
        
    def _consecutive(self, data, stepsize=1):
        return np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
    
    
    @property
    def tpr(self):
        return self.tp/(self.fn+self.tp)
    
    @property
    def fpr(self):
        return self.fp/(self.tn+self.fp)
    
    @property
    def tnr(self):
        return self.tn/(self.tn+self.fp)
        
    @property
    def fnr(self):
        return self.fn/(self.fn+self.tp)
        
    @property
    def precision(self):
        return self.tp/(self.tp+self.fp)
    
    @property
    def recall(self):
        return self.tp/(self.tp+self.fn)
    
    @property
    def f1(self):
        return (2*self.precision*self.recall)/(self.precision+self.recall)
    
    

# Run Experiments

## Metric 1

### -- Setup

In [8]:
PATH0 = "../data/univar-synth/point_global/"
PATH1 = "../data/univar-synth/point_contextual/"
PATH2 = "../data/univar-synth/collective_global/"
PATH3 = "../data/univar-synth/collective_trend/"
PATH4 = "../data/univar-synth/collective_seasonal/"

In [9]:
PATHS = [PATH0, PATH1, PATH2, PATH3, PATH4]

In [10]:
SAVE_DIR = "../results/univar-synth/unsup1tuned/"

In [11]:
def _eval_wo_label(data, preds):
    """
    Returns a few quantitative metrics for us to use for evaluation when labels
    are not provided.
    Parameters
    -----------
    df: pd.Dataframe
        the dataframe with 'timestamp', 'value' and 'predict' columns
        where 'predict' is 1 for those predicted as anomalous and 0 otherwise.
    Returns
    ----------
    tuple
        (number of anomalies,
        % of anomnalies,
        avg. distance between mean and all anomalies (yaxis),
        avg. time distance between consecutive anomalies,
        avg. cycle distance between consecutive anomalies,
        maximum range between non anomaly points (yaxis)
        )
    """
    num_anomalies = preds.sum()
    percent_anomalies = num_anomalies/len(preds)

    mean_val = data.mean(axis=0)

    pred_anomalies = data[preds == 1]
    pred_non_anomalies = data[preds == 0]

    avg_anom_dist_from_mean_val = np.linalg.norm(pred_anomalies - mean_val, axis=-1).mean()
    avg_cycles_delta_between_anomalies = np.diff(np.where(preds==1)[0]).mean()
    try:
        max_range_non_anomalies = (np.abs(pred_non_anomalies.max() - pred_non_anomalies.min())).mean() 
    except ValueError: max_range_non_anomalies = np.nan

    return (num_anomalies,
            percent_anomalies,
            avg_anom_dist_from_mean_val,
            avg_cycles_delta_between_anomalies,
            max_range_non_anomalies)


def compute_objective(data, preds):
    preds = preds.astype(int)

    (num_anomalies, percent_anomalies, avg_anom_dist_from_mean_val,
    avg_cycles_delta_between_anomalies, max_range_non_anomalies) = _eval_wo_label(data, preds)

    obj = 1e4 * percent_anomalies + max_range_non_anomalies - avg_cycles_delta_between_anomalies
    
    # If nan, should return number in case it always gives nan
    if np.isnan(obj) or np.isinf(obj):
        obj = 1e10
        
    return obj

In [12]:
optuna.logging.set_verbosity(optuna.logging.CRITICAL)

### Quantile

In [None]:
# Quantile Model
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        def objective(trial):
            window = trial.suggest_int("window", 100, 1000)
            threshold = trial.suggest_float("threshold", 0.95, 0.999)
            
            test_extend = np.concatenate((train[-window:], test))
            model = QuantileModel(window)
            scores = model.get_scores(test_extend)[window:] 
            
            return compute_objective(test, scores)
        
        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=50)
       
        window = study.best_params["window"]
        threshold = study.best_params["threshold"]
        model = QuantileModel(window, threshold)
        
        test_extend = np.concatenate((train[-window:], test))
        
        scores = model.get_scores(test_extend)[window:] 
        
        # Save results
        save = SAVE_DIR+"quantile/"+f
        os.makedirs(SAVE_DIR+"quantile/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(scores, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
       
    

### MA

In [None]:
import warnings
warnings.filterwarnings('ignore')

# MA Model 
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            q = trial.suggest_float("q", 1e-5, 1e-1)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            
            test_extend = np.concatenate((train[-window:], test))
            model = MovingAverageModel(window)
            scores = np.abs(model.get_scores(test_extend)[window:])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
            
            return compute_objective(test, preds)
           
       
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=150)
       
        window = study.best_params["window"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        model = MovingAverageModel(window)
        
        test_extend = np.concatenate((train[-window:], test))
        
        scores = np.abs(model.get_scores(test_extend)[window:] )
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"ma/"+f
        os.makedirs(SAVE_DIR+"ma/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())


    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

### ARIMA

In [None]:
# ARIMA Model 
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        def objective(trial):
            s = ScoreCounter()
            
            p = trial.suggest_int("p", 1, 20)
            d = trial.suggest_int("d", 0, 3)
            q = trial.suggest_int("q", 0, 20)
            q_risk = trial.suggest_float("q_risk", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
 
            test_extend = np.concatenate((train[-window:], test))
                
            model = ARIMAModel(p, d, q)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend))

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q_risk, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            s.process(preds, labels)
        
            if s.tp == 0 and s.fp == 0: return -1
            if s.tp == 0 and s.fn == 0: return -1

            if s.precision == 0 and s.recall == 0: return -1
            if np.isnan(s.f1): return -1
            return s.f1
 
       
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=20)
       
        p = study.best_params["p"]
        d = study.best_params["d"]
        q = study.best_params["q"]
        q_risk = study.best_params["q_risk"]
        contam = study.best_params["contam"]
        
        model = ARIMAModel(p, d, q)
        model.fit(train)
        test_extend = np.concatenate((train[-window:], test))
        scores = np.abs(model.get_scores(test_extend))
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q_risk, contam)
        
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"arima/"+f
        os.makedirs(SAVE_DIR+"arima/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())


    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

### IForest

In [None]:
# IForest Model 
import warnings
warnings.filterwarnings('error')


for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        
        def objective(trial):
            s = ScoreCounter()
            q = trial.suggest_float("q", 1e-5, 1e-1)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
                
            model = IsolationForestModel()
            model.fit(train)
            scores = np.abs(model.get_scores(test))

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            s.process(preds, labels)
        
            if s.tp == 0 and s.fp == 0: return -1
            if s.tp == 0 and s.fn == 0: return -1

            if s.precision == 0 and s.recall == 0: return -1
            if np.isnan(s.f1): return -1
            return s.f1
 
            
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=150)
       
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        model = IsolationForestModel()
        model.fit(train)
        scores = np.abs(model.get_scores(test))
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"iforest/"+f
        os.makedirs(SAVE_DIR+"iforest/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())
        np.savetxt(save+"-scores.txt", scores)
        np.savetxt(save+"-preds.txt", preds)


    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
   

### Regression

In [None]:
# Regression Model 
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")

        def objective(trial):
            s = ScoreCounter()
            
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            lags = trial.suggest_int("lags", 1, 5)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
 
            test_extend = np.concatenate((train[-window:], test))
                
            model = RegressionModel(window, n_steps, lags)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            return compute_objective(test, preds)
      
    
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=50)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        lags = study.best_params["lags"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        
        model = RegressionModel(window,n_steps, lags)
        model.fit(train)
        test_extend = np.concatenate((train[-window:], test))
        scores = np.abs(model.get_scores(test_extend)[0])
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
        # Save results
        save = SAVE_DIR+"regression/"+f
        os.makedirs(SAVE_DIR+"regression/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())
        np.savetxt(save+"-scores.txt", scores)


    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

### NBEATS

In [None]:
%%capture
# NBEATSModel
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            

            test_extend = np.concatenate((train[-window:], test))
                
            model = NBEATSModel(window, n_steps, use_gpu=True)            
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
            
            return compute_objective(test, preds)
 

        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = NBEATSModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"nbeats/"+f
        os.makedirs(SAVE_DIR+"nbeats/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"nbeats/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


### NHiTs

In [None]:
%%capture
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            
            test_extend = np.concatenate((train[-window:], test))
                
            model = NHiTSModel(window, n_steps, use_gpu=True)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            return compute_objective(test, preds)

        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = NHiTSModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"nhits/"+f
        os.makedirs(SAVE_DIR+"nhits/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"nhits/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


### RNN(GRU)

In [None]:
%%capture
#supress output

for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            s = ScoreCounter()
            
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            

            test_extend = np.concatenate((train[-window:], test))
                
            model = RNNModel(window, n_steps, rnn_model="GRU")
            
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
            
            return compute_objective(test, preds)
 

        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = RNNModel(window, n_steps, use_gpu=True, rnn_model="GRU")
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"rnn_gru/"+f
        os.makedirs(SAVE_DIR+"rnn_gru/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"rnn_gru/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}\n")


### TCN

In [None]:
%%capture
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            
               
            test_extend = np.concatenate((train[-window:], test))
            
            model = TCNModel(window, n_steps, use_gpu=True)
            #model.params = params
            #model._init_model(**model.params)
            
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
            
            return compute_objective(test, preds)
 

        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = TCNModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"tcn/"+f
        os.makedirs(SAVE_DIR+"tcn/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"tcn/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


### Transformer

In [None]:
%%capture
for path in PATHS:
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    scorer = ScoreCounter()
    for f in file_list:
        train = np.loadtxt(path+f+"-train.txt")
        test = np.loadtxt(path+f+"-test.txt")
        labels = np.loadtxt(path+f+"-labels.txt")
        
        def objective(trial):
            window = trial.suggest_int("window", 10, 150)
            n_steps = trial.suggest_int("n_steps", 1, 10, log=True)
            q = trial.suggest_float("q", 1e-5, 1e-1, log=True)
            contam = trial.suggest_float("contam", 0.90, 0.999)
            
            test_extend = np.concatenate((train[-window:], test))
                
            model = TransformerModel(window, n_steps, use_gpu=True)
            model.fit(train)
            scores = np.abs(model.get_scores(test_extend)[0])

            # Get threshold (Not needed for Quantile)
            thres = pot(scores, q, contam)
            preds = scores.copy()
            preds[preds <= thres] = 0
            preds[preds > thres] = 1
 
            return compute_objective(test, preds)
        

        
        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=35)
       
        window = study.best_params["window"]
        n_steps = study.best_params["n_steps"]
        q = study.best_params["q"]
        contam = study.best_params["contam"]
        

        test_extend = np.concatenate((train[-window:], test))
        model = TransformerModel(window, n_steps, use_gpu=True)
 
        model.fit(train)
        scores = model.get_scores(test_extend)[0]
        
        
        # Get threshold (Not needed for Quantile)
        thres = pot(scores, q, contam)
        
        # Get predictions from threshold
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1
        
        # Save results
        save = SAVE_DIR+"transformer/"+f
        os.makedirs(SAVE_DIR+"transformer/", exist_ok=True)
        np.savetxt(save+"-scores.txt", scores, header=study.best_params.__str__())
        np.savetxt(save+"-preds.txt", preds, header=study.best_params.__str__())

        scorer.process(preds, labels)

    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")
    with open(SAVE_DIR+"transformer/summary.txt", 'a+') as summary:
        summary.write(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")


In [None]:
for path in PATHS:
    scorer = ScoreCounter()
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    for f in file_list:
        labels = np.loadtxt(path+f+"-labels.txt")
        preds = np.loadtxt(SAVE_DIR+"transformer/"+f+"-preds.txt")
        scorer.process(preds, labels)
        
    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

## Jenks Natural Breaks

In [31]:
model = "quantile"
for path in PATHS:
    scorer = ScoreCounter()
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    for f in file_list:
        labels = np.loadtxt(path+f+"-labels.txt")
        scores = np.loadtxt(SAVE_DIR+f"{model}/"+f+"-scores.txt")
        
        thres = jenkspy.jenks_breaks(scores, nb_class=20)[-2]
        preds = scores.copy()
        preds[preds <= thres] = 0
        preds[preds > thres] = 1

        scorer.process(preds, labels)
        
    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

347, 137, 18891, 625, 0.35699588477366256, 0.007199915913390792, 0.9928000840866092, 0.6430041152263375, 0.7169421487603306, 0.35699588477366256, 0.4766483516483517
46, 410, 18611, 933, 0.04698672114402452, 0.021555123284790496, 0.9784448767152095, 0.9530132788559755, 0.10087719298245613, 0.04698672114402452, 0.06411149825783972
400, 341, 18659, 600, 0.4, 0.017947368421052632, 0.9820526315789474, 0.6, 0.5398110661268556, 0.4, 0.4595060310166571
604, 516, 18580, 300, 0.668141592920354, 0.02702136573104315, 0.9729786342689568, 0.33185840707964603, 0.5392857142857143, 0.668141592920354, 0.5968379446640316
1771, 501, 17628, 100, 0.9465526456440406, 0.027635280489822937, 0.9723647195101771, 0.05344735435595938, 0.7794894366197183, 0.9465526456440406, 0.8549360366883899


## Load Results

In [19]:
model = "transformer"
for path in PATHS:
    scorer = ScoreCounter()
    file_list = ["-".join(f.split("-")[:-1]) for f in get_files_from_path(path) if "train" in f]
    for f in file_list:
        labels = np.loadtxt(path+f+"-labels.txt")
        preds = np.loadtxt(SAVE_DIR+f"{model}/"+f+"-preds.txt")
        scorer.process(preds, labels)
        
    print(f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}")

7, 0, 19028, 965, 0.00720164609053498, 0.0, 1.0, 0.992798353909465, 1.0, 0.00720164609053498, 0.014300306435137895
7, 32, 18989, 972, 0.007150153217568948, 0.0016823510856421849, 0.9983176489143578, 0.992849846782431, 0.1794871794871795, 0.007150153217568948, 0.0137524557956778
200, 6, 18994, 800, 0.2, 0.00031578947368421053, 0.9996842105263158, 0.8, 0.970873786407767, 0.2, 0.3316749585406302
304, 8, 19088, 600, 0.336283185840708, 0.00041893590280687055, 0.9995810640971932, 0.6637168141592921, 0.9743589743589743, 0.336283185840708, 0.5000000000000001
269, 6, 18123, 1602, 0.14377338321753072, 0.00033096144299189144, 0.9996690385570081, 0.8562266167824693, 0.9781818181818182, 0.14377338321753072, 0.25069897483690584
