In [None]:
import os

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view   
import optuna

In [None]:
from one.generator.univariate import UnivariateDataGenerator
from one.models import *
from one.utils import *
from one.scorer.pot import *

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
plt.rcParams["figure.figsize"] = 40,10
plt.rcParams["font.size"] = 15

# Load NASA Dataset

In [None]:
TRAIN = "../data/nasa/train/"
TEST = "../data/nasa/test/"
LABELS = "../data/nasa/labeled_anomalies.csv"

In [None]:
import ast
def convert_class(x):
    x = x.replace("contextual", "\"contextual\"")
    x = x.replace("point", "\"point\"")
    return x

def make_array(row):
    return np.array(row)

converters = {
    "anomaly_sequences": lambda x: make_array(ast.literal_eval(x)),
    "class": lambda x: ast.literal_eval(convert_class(x))
}


label_df = pd.read_csv(LABELS, converters=converters)

# get only those with multiple anomalies
label_df = label_df[label_df["anomaly_sequences"].apply(lambda x: x.shape[0]) > 1]
label_df = label_df.reset_index(drop=True)

In [None]:
label_df

In [None]:
train=[]
for root, _, files in os.walk(TRAIN):
    for file in files:
        if file.split(".")[0] in label_df["chan_id"].values:
             train.append(os.path.join(root, file))
                
test=[]
for root, _, files in os.walk(TEST):
    for file in files:
        if file.split(".")[0] in label_df["chan_id"].values:
             test.append(os.path.join(root, file))
                
                
file_list = list(zip(train, test))

## Get Labels

In [None]:
data_labels = {}

for idx, rng in enumerate(label_df["anomaly_sequences"].values):
    name = label_df.iloc[idx, 0]
    test_arr = np.load(f"{TEST}{name}.npy")
    
    label = np.zeros(len(test_arr))
    
    for start, end in rng:
        label[start:end] = 1
    
    data_labels.update({name: label})

# Visualize

In [None]:
for train, test in file_list:
    train_arr = np.load(train)
    test_arr = np.load(test)
    data = np.vstack((train_arr, test_arr))
    name = train.split("/")[-1].split(".")[0]
    print(f"{name} {'#'*30}")
    
    
    plt.rcParams["figure.figsize"] = 40, 1 * data.shape[1]
    plt.rcParams["font.size"] = 15

    
    fig, axes = plt.subplots(data.shape[1])
    axes[0].set_title(name)

    for idx, d in enumerate(data.T):
        axes[idx].plot(d)
        
    labels = label_df[label_df["chan_id"] == name]["anomaly_sequences"].values[0]
    for start, end in labels:
        start += len(train_arr)
        end += len(train_arr)
        
        for ax in axes:
            ax.axvspan(start, end, alpha=0.5, color='red')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
            


    axes[-1].get_xaxis().set_visible(True)
    plt.tight_layout()
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()

# Scoring Helper

In [None]:
class ScoreCounter:
    def __init__(self, delay: int=None):
        self.tp = 0
        self.fp = 0
        self.tn = 0
        self.fn = 0
        
        self.delay = delay
        
        self.point_anom_total = 0
        self.point_anom_correct = 0
        
        self.contextual_anom_total = 0
        self.contextual_anom_correct = 0
        
    def process(self, preds, labels, types):
        preds = preds.copy()
        labels = labels.copy()
        
        ground_truth_ones = np.where(labels == 1)[0]
        pred_ones = np.where(preds == 1)[0]
        ranges = self._consecutive(ground_truth_ones)
        
        tp, fp, tn, fn = 0, 0, 0, 0
        
        for idx, r in enumerate(ranges):
            intersect = np.intersect1d(r, pred_ones, assume_unique=True)
            # if alert delay more than 100 timesteps, count that as bad!
            
            if types[idx] == "point": self.point_anom_total += 1
            else: self.contextual_anom_total += 1
               
            if intersect.size != 0:
                cond = intersect[0] < r[0] + self.delay if self.delay is not None else True
                if cond:
                    tp += r.size
                    if types[idx] == "point": self.point_anom_correct += 1
                    else: self.contextual_anom_correct += 1
                else:
                    fn += r.size
 
                preds[intersect] = 0
                pred_ones = np.where(preds == 1)[0]
            else:
                fn += r.size
            
        fp += pred_ones.size
        tn += preds.size - tp - fp - fn
        
        self.tp += tp
        self.fp += fp
        self.tn += tn
        self.fn += fn
        
        
        return
        
        
    def _consecutive(self, data, stepsize=1):
        return np.split(data, np.where(np.diff(data) != stepsize)[0]+1)
    
    
    @property
    def tpr(self):
        return self.tp/(self.fn+self.tp)
    
    @property
    def fpr(self):
        return self.fp/(self.tn+self.fp)
    
    @property
    def tnr(self):
        return self.tn/(self.tn+self.fp)
        
    @property
    def fnr(self):
        return self.fn/(self.fn+self.tp)
        
    @property
    def precision(self):
        if self.tp+self.fp == 0: return 0
        return self.tp/(self.tp+self.fp)
    
    @property
    def recall(self):
        if self.tp+self.fn == 0: return 0
        return self.tp/(self.tp+self.fn)
    
    @property
    def f1(self):
        if self.recall + self.precision == 0: return 0
        return (2*self.precision*self.recall)/(self.precision+self.recall)
    
    @property
    def point_correct(self):
        if self.point_anom_total == 0: return 0
        return self.point_anom_correct/self.point_anom_total
    
    @property
    def contextual_correct(self):
        if self.contextual_anom_total == 0: return 0
        return self.contextual_anom_correct/self.contextual_anom_total
    
    def __str__(self):
        return f"{scorer.tp}, {scorer.fp}, {scorer.tn}, {scorer.fn}, {scorer.tpr}, {scorer.fpr}, {scorer.tnr}, {scorer.fnr}, {scorer.precision}, {scorer.recall}, {scorer.f1}, {scorer.point_correct}, {scorer.contextual_correct}"
    
    

In [None]:
labels = np.append(np.zeros(20), np.full(20, 1))

In [None]:
pred = np.append(np.zeros(30), np.full(10, 1))
pred[10] = 1

In [None]:
tp = 0
fp = 1
tn = 19
fn = 20

scorer = ScoreCounter(delay=5)
scorer.process(pred, labels, ["point"])

assert tp == scorer.tp
assert fp == scorer.fp
assert tn == scorer.tn
assert fn == scorer.fn

# Run Experiments

## F1 Tuned (50% Train)

### Setup

### Single-variate Ensemble

In [None]:
SAVE_DIR = "../results/multivar-nasa/f1tuned-1anom/"
DELAY = 50
optuna.logging.set_verbosity(optuna.logging.FATAL)

#### MA

In [None]:
def train_and_score(train_set, test_set, params):
    window = params["window"]
    q = params["q"]
    contam = params["contam"]
    
    model = MovingAverageModel(window)
    preds = []
    scores = []
    for series in test_set.T:
        score = np.abs(model.get_scores(series)[window:])
        scores.append(score)

        # Get threshold (Not needed for Quantile)
        thres = pot(score, q, contam)

        pred = score.copy()
        pred[pred <= thres] = 0
        pred[pred > thres] = 1

        preds.append(pred)

    scores = np.array(scores).T
    preds = np.any(preds, axis=0)
    
    return preds, scores

In [None]:
# Moving Average Model
model_name = "ma"

scorer = ScoreCounter(delay=DELAY)
for train, test in file_list:
    
    # Prepare Data
    name = train.split("/")[-1].split(".")[0]
    
    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 1, 100),
            "q": trial.suggest_float("q", 1e-7, 1e-1, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        test_extend = np.concatenate((train[-window:], tune_data))
        preds, _ = train_and_score(train, test_extend, params)

        s.process(preds, tune_labels, types)

        if s.tp == 0 and s.fp == 0: return -1
        if s.tp == 0 and s.fn == 0: return -1

        if s.precision == 0 and s.recall == 0: return -1
        if np.isnan(s.f1): return -1
        return s.f1

        
    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=150)
    
    # Get Tune Results
    params = {
        "window": study.best_params["window"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }

    test_extend = np.concatenate((train[-window:], test))
    preds, scores = train_and_score(train, test_extend, params)
    
    # save
    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)

print(scorer)

#### Quantile

In [None]:
def train_and_score(train_set, test_set, params):
    window = params["window"]
    thres = params["thres"]
    q = params["q"]
    contam = params["contam"]
    
    model = QuantileModel(window, thres)
    preds = []
    scores = []
    for series in test_set.T:
        score = model.get_scores(series)[window:]
        scores.append(score)
        preds.append(score)
        
    scores = np.array(scores).T
    preds = np.any(scores, axis=1)
    
    return preds, scores

In [None]:
model_name = "quantile"

scorer = ScoreCounter(delay=DELAY)
for train, test in file_list:
    
    # Prepare Data
    name = train.split("/")[-1].split(".")[0]
    
    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 1, 100),
            "thres": trial.suggest_float("thres", 0.9, 0.999),
            "q": trial.suggest_float("q", 1e-7, 1e-1, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        test_extend = np.concatenate((train[-params["window"]:], tune_data))
        preds, _ = train_and_score(train, test_extend, params)

        s.process(preds, tune_labels, types)

        if s.tp == 0 and s.fp == 0: return -1
        if s.tp == 0 and s.fn == 0: return -1

        if s.precision == 0 and s.recall == 0: return -1
        if np.isnan(s.f1): return -1
        return s.f1

        
    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=50)
    
    # Get Tune Results
    params = {
        "window": study.best_params["window"],
        "thres": study.best_params["thres"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }

    test_extend = np.concatenate((train[-params["window"]:], test))
    preds, scores = train_and_score(train, test_extend, params)
    
    # save
    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)

print(scorer)

#### Regression

In [None]:
def train_and_score(train_set, test_set, params):
    window = params["window"]
    q = params["q"]
    contam = params["contam"]
    
    model = RegressionModel(window)
    preds = []
    scores = []
    for idx, series in enumerate(test_set.T):
        model = RegressionModel(window)
        model.fit(train_set.T[idx])

        score = model.get_scores(series)[0].flatten()
        scores.append(score)
        
        # Get threshold (Not needed for Quantile)
        thres = pot(score, q, contam)

        pred = score.copy()
        pred[pred <= thres] = 0
        pred[pred > thres] = 1

        preds.append(pred)
        
    scores = np.array(scores).T
    preds = np.any(preds, axis=1)
    
    return preds, scores

In [None]:
model_name = "regression"
scorer = ScoreCounter(delay=DELAY)
for train, test in file_list:
    # Prepare Data
    name = train.split("/")[-1].split(".")[0]
    
    
    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 1, 100),
            "q": trial.suggest_float("q", 1e-7, 1e-1, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        window = params["window"]
        test_extend = np.concatenate((train[-window:], tune_data))
        preds, _ = train_and_score(train, test_extend, params)

        s.process(preds, tune_labels, types)

        if s.tp == 0 and s.fp == 0: return -1
        if s.tp == 0 and s.fn == 0: return -1

        if s.precision == 0 and s.recall == 0: return -1
        if np.isnan(s.f1): return -1
        return s.f1

        
    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=15)
    
    # Get Tune Results
    params = {
        "window": study.best_params["window"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }

    window = params["window"]
    test_extend = np.concatenate((train[-window:], test))
    preds, scores = train_and_score(train, test_extend, params)
    
    # save
    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)

print(scorer)

In [None]:
import json

In [None]:
method = "regression"

#######################
SAVE_DIR = "./results/multivar-nasa/f1tuned-1anom/"
method += "/"
l = [i for i in get_files_from_path(SAVE_DIR+method) if "preds" not in i]
scorer = ScoreCounter(delay=DELAY)
for f in l:
    d = SAVE_DIR + method + f
    with open(d) as file:
        params = file.readline().strip().replace("# ", "").replace("'", '"')
        params = json.loads(params)
        
    name = "-".join(f.split("-")[:2])
    idx = [name in i[0] for i in file_list].index(True)
    
    path = file_list[idx][1]
    data = np.load(path)
    label = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    scores = np.loadtxt(SAVE_DIR+method+f)
   
    preds = []
    for score in scores:
        thres = pot(score, params["q"], params["contam"])

        pred = score.copy()
        pred[pred <= thres] = 0
        pred[pred > thres] = 1
    
        preds.append(pred)
   

    preds = np.array(preds)
    preds = np.any(preds, axis=1)
    
    np.savetxt(d.replace("scores", "preds"), preds, header=str(params))
    
#print(scorer)

### Multivariate Native

#### IForest

In [None]:
# This also "realigns" test data
def train_and_score(train_set, test_set, params):
    window = params["window"]
    q = params["q"]
    contam = params["contam"]
    
    model = IsolationForestModel(window)
    
    test_extend = np.concatenate((train_set[-window:], test_set))
    model = IsolationForestModel(window)
    model.fit(train_set)
    score = model.get_scores(test_extend).flatten()[window:]
    
    
    # Get threshold (Not needed for Quantile)
    thres = pot(score, q, contam)
    pred = score.copy()
    pred[pred <= thres] = 0
    pred[pred > thres] = 1
    
    return pred, score

In [None]:
model_name = "iforest"

scorer = ScoreCounter(delay=DELAY)
for train, test in file_list:
    
    # Prepare Data
    name = train.split("/")[-1].split(".")[0]
    
    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    i = 0
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 1, 100),
            "q": trial.suggest_float("q", 1e-7, 1e-2, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        preds, _ = train_and_score(train, tune_data, params)
        
        s.process(preds, tune_labels, types)
        
        return s.f1

        
    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=150)
    
    # Get Tune Results
    params = {
        "window": study.best_params["window"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }
    
    preds, scores = train_and_score(train, test, params)
    
    # save
    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)

print(scorer)

#### NBEATS

In [None]:
# This also "realigns" test data
def train_and_score(train, test, params):
    window = params["window"]
    n_steps = params["n_steps"]
    q = params["q"]
    contam = params["contam"]
    
    test_extend = np.concatenate((train[-window:], test))
    model = NBEATSModel(window, n_steps, use_gpu=True)
    model.fit(train)
    
    score = model.get_scores(test_extend)[0].flatten()
    res = model.get_scores(test_extend)[1]
    
    # Get threshold (Not needed for Quantile)
    thres = pot(score, q, contam)

    pred = score.copy()
    pred[pred <= thres] = 0
    pred[pred > thres] = 1
    
    return pred, res

In [None]:
%%capture
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 


model_name = "nbeats"
scorer = ScoreCounter(delay=DELAY)

for train, test in file_list:
    name = train.split("/")[-1].split(".")[0]

    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
        
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 11, 100),
            "n_steps": trial.suggest_int("n_steps", 1, 10),
            "q": trial.suggest_float("q", 1e-7, 1e-2, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        preds, _ = train_and_score(train, tune_data, params)
        s.process(preds, tune_labels, types)
        return s.f1

    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=35)
    params = {
        "window": study.best_params["window"],
        "n_steps": study.best_params["n_steps"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }
    
    preds, scores = train_and_score(train, test, params)

    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", res, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)
print(scorer)

#### NHiTs

In [None]:
# This also "realigns" test data
def train_and_score(train, test, params):
    window = params["window"]
    n_steps = params["n_steps"]
    q = params["q"]
    contam = params["contam"]
    
    test_extend = np.concatenate((train[-window:], test))
    model = NHiTSModel(window, n_steps, use_gpu=True)
    model.fit(train)
    
    score = model.get_scores(test_extend)[0].flatten()
    res = model.get_scores(test_extend)[1]
    
    # Get threshold (Not needed for Quantile)
    thres = pot(score, q, contam)

    pred = score.copy()
    pred[pred <= thres] = 0
    pred[pred > thres] = 1
    
    return pred, res

In [None]:
%%capture
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 


model_name = "nhits"
scorer = ScoreCounter(delay=DELAY)

for train, test in file_list:
    name = train.split("/")[-1].split(".")[0]

    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
        
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 11, 100),
            "n_steps": trial.suggest_int("n_steps", 1, 10),
            "q": trial.suggest_float("q", 1e-7, 1e-2, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        preds, _ = train_and_score(train, tune_data, params)
        s.process(preds, tune_labels, types)
        return s.f1

    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=35)
    params = {
        "window": study.best_params["window"],
        "n_steps": study.best_params["n_steps"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }
    
    preds, scores = train_and_score(train, test, params)

    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", res, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)
print(scorer)

#### RNN-GRU

In [None]:
# This also "realigns" test data
def train_and_score(train, test, params):
    window = params["window"]
    n_steps = params["n_steps"]
    q = params["q"]
    contam = params["contam"]
    
    test_extend = np.concatenate((train[-window:], test))
    model = RNNModel(window, n_steps, use_gpu=True, rnn_model="GRU")
    model.fit(train)
    
    score = model.get_scores(test_extend)[0].flatten()
    res = model.get_scores(test_extend)[1]
    
    # Get threshold (Not needed for Quantile)
    thres = pot(score, q, contam)

    pred = score.copy()
    pred[pred <= thres] = 0
    pred[pred > thres] = 1
    
    return pred, res

In [None]:
%%capture
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 


model_name = "rnn_gru"
scorer = ScoreCounter(delay=DELAY)

for train, test in file_list:
    name = train.split("/")[-1].split(".")[0]

    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
        
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 11, 100),
            "n_steps": trial.suggest_int("n_steps", 1, 10),
            "q": trial.suggest_float("q", 1e-7, 1e-2, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        preds, _ = train_and_score(train, tune_data, params)
        s.process(preds, tune_labels, types)
        return s.f1

    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=35)
    params = {
        "window": study.best_params["window"],
        "n_steps": study.best_params["n_steps"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }
    
    preds, scores = train_and_score(train, test, params)

    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)
print(scorer)

#### TCN

In [None]:
# This also "realigns" test data
def train_and_score(train, test, params):
    window = params["window"]
    n_steps = params["n_steps"]
    q = params["q"]
    contam = params["contam"]
    
    test_extend = np.concatenate((train[-window:], test))
    model = TCNModel(window, n_steps, use_gpu=True)
    model.fit(train)
    model.fit(train)
    
    score = model.get_scores(test_extend)[0].flatten()
    res = model.get_scores(test_extend)[1]
    
    # Get threshold (Not needed for Quantile)
    thres = pot(score, q, contam)

    pred = score.copy()
    pred[pred <= thres] = 0
    pred[pred > thres] = 1
    
    return pred, res

In [None]:
%%capture
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 


model_name = "tcn"
scorer = ScoreCounter(delay=DELAY)

for train, test in file_list:
    name = train.split("/")[-1].split(".")[0]

    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
        
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 11, 100),
            "n_steps": trial.suggest_int("n_steps", 1, 10),
            "q": trial.suggest_float("q", 1e-7, 1e-2, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        preds, _ = train_and_score(train, tune_data, params)
        s.process(preds, tune_labels, types)
        return s.f1

    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=35)
    params = {
        "window": study.best_params["window"],
        "n_steps": study.best_params["n_steps"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }
    
    preds, scores = train_and_score(train, test, params)

    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)
print(scorer)

#### Transformer

In [None]:
# This also "realigns" test data
def train_and_score(train, test, params):
    window = params["window"]
    n_steps = params["n_steps"]
    q = params["q"]
    contam = params["contam"]
    
    test_extend = np.concatenate((train[-window:], test))
    model = TransformerModel(window, n_steps, use_gpu=True)
    model.fit(train)
    model.fit(train)
    
    score = model.get_scores(test_extend)[0].flatten()
    res = model.get_scores(test_extend)[1]
    
    # Get threshold (Not needed for Quantile)
    thres = pot(score, q, contam)

    pred = score.copy()
    pred[pred <= thres] = 0
    pred[pred > thres] = 1
    
    return pred, res

In [None]:
%%capture
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 


model_name = "transformer"
scorer = ScoreCounter(delay=DELAY)

for train, test in file_list:
    name = train.split("/")[-1].split(".")[0]

    train = np.load(train)
    test = np.load(test)
    labels = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
        
    tune_len = label_df.loc[label_df["chan_id"]==name]["anomaly_sequences"].values[0][0][1] + 1
    tune_data = test[:tune_len]
    tune_labels = labels[:tune_len]
    
    
    def objective(trial):
        s = ScoreCounter(delay=DELAY)

        params = {
            "window": trial.suggest_int("window", 11, 100),
            "n_steps": trial.suggest_int("n_steps", 1, 10),
            "q": trial.suggest_float("q", 1e-7, 1e-2, log=True),
            "contam": trial.suggest_float("contam", 0.90, 0.999)
        }

        preds, _ = train_and_score(train, tune_data, params)
        s.process(preds, tune_labels, types)
        return s.f1

    # Tune
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=35)
    params = {
        "window": study.best_params["window"],
        "n_steps": study.best_params["n_steps"],
        "q": study.best_params["q"],
        "contam": study.best_params["contam"]
    }
    
    preds, scores = train_and_score(train, test, params)

    save = f"{SAVE_DIR}{model_name}/{name}"
    os.makedirs(SAVE_DIR+model_name, exist_ok=True)
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)
print(scorer)

In [None]:
method = "transformer"

#######################
SAVE_DIR = "./results/multivar-nasa/f1tuned-1anom/"
method += "/"
l = [i for i in get_files_from_path(SAVE_DIR+method) if "preds" not in i]

params_file = {}
for f in l:
    name = "-".join(f.split("-")[:2])
    with open(SAVE_DIR+method+f, 'r') as file:
        params = file.readline()
        
    params = params.strip()
    params = params.replace("# ", "")
    params = params.replace("'", '"')
    params = json.loads(params)
    
    params_file.update({name: params})

print(params)
    
#######################
print("here")
#######################
scorer = ScoreCounter(delay=DELAY)
for f in l:
    name = "-".join(f.split("-")[:2])
    idx = [name in i[0] for i in file_list].index(True)
    
    test_path = file_list[idx][1]
    test = np.load(test_path)
    
    train_path = file_list[idx][0]
    train = np.load(train_path)
    
    label = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    params = params_file[name]
    
    preds, scores = train_and_score(train, test, params)
    save = f"{SAVE_DIR}{method}/{name}"
    np.savetxt(save+"-scores.txt", scores, header=str(params))
    np.savetxt(save+"-preds.txt", preds, header=str(params))

    scorer.process(preds, labels, types)

print(scorer)

# Load Results

## Untuned

In [123]:
method = "tranad"

#######################
SAVE_DIR = "../results/multivar-nasa/untuned/"
method += "/"
l = [i for i in get_files_from_path(SAVE_DIR+method) if "scores" not in i]
scorer = ScoreCounter(delay=DELAY)
for f in l:
    name = "-".join(f.split("-")[:2])
    idx = [name in i[0] for i in file_list].index(True)
    
    path = file_list[idx][1]
    data = np.load(path)
    label = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    preds = np.loadtxt(SAVE_DIR+method+f)
    
    scorer.process(preds, label, types)
    
print(scorer)

2436, 914, 96954, 4465, 0.3529923199536299, 0.00933910982139208, 0.9906608901786079, 0.6470076800463701, 0.7271641791044776, 0.3529923199536299, 0.4752707052970442, 0.26666666666666666, 0.4230769230769231


## F1 - 50%

In [None]:
method = "regression"

#######################
SAVE_DIR = "../results/multivar-nasa/f1tuned-1anom/"
method += "/"
l = [i for i in get_files_from_path(SAVE_DIR+method) if "scores" not in i]
scorer = ScoreCounter(delay=DELAY)
for f in l:
    name = "-".join(f.split("-")[:2])
    idx = [name in i[0] for i in file_list].index(True)
    
    path = file_list[idx][1]
    data = np.load(path)
    label = data_labels[name]
    types = label_df.loc[label_df["chan_id"]==name]["class"].values[0]
    
    preds = np.loadtxt(SAVE_DIR+method+f)
    
    scorer.process(preds, label, types)
    
print(scorer)