In [None]:
import pandas as pd
eval_files = pd.read_csv("benchmark_exp/TSB-AD/File-List/TSB-AD-U-Eval-List.csv")
eval_files

In [None]:
files = {}
for f in eval_files.file_name:
    files[f.split(".")[0]] = pd.read_csv(f"benchmark_exp/TSB-AD/TSB-AD-U/{f}")['Data']

In [None]:
asdf = pd.Series(files)

In [None]:
files_with_names = []
for f, s in files.items():
    s_new = pd.DataFrame(s.copy())
    s_new['name'] = f
    files_with_names.append(s_new)

In [None]:
df_long = pd.concat(files_with_names)

In [None]:
df_long.reset_index().to_parquet("series_data_long.parquet")

In [None]:
bla = pd.DataFrame(asdf)

In [None]:
bla[0].apply(list).to_csv("combined_files.csv")

In [None]:
pd.DataFrame(bla[0].apply(list)).to_parquet("combined_files.parquet")

In [None]:
df = pd.read_parquet("dd2ab5d8-43f0-4a58-acf4-70285684f4be_0_49abff8e02de4e998b1ab9a30f3d7878.parquet")

In [None]:
df

In [None]:
pd.DataFrame(bla[0].apply(list)).to_parquet("missing.parquet")

In [None]:
df['DataSetName']

In [None]:
missing = [x for x in bla.index if x not in df['DataSetName'].tolist()]

In [None]:
pd.DataFrame(bla.loc[missing][0].apply(list)).to_parquet("actually_missing.parquet")

In [None]:
from TSB_AD.evaluation.metrics import get_metrics


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.ticker import MultipleLocator
import numpy as np

def plot_detection(signal, label, scores=None, train=None, ax=None, linewidth=1, window_length=None):
    if train is not None:
        if signal.index.min() < train.index.max():
            signal.index = signal.index + train.index.max()
    scores = pd.Series(scores, index=signal.index)
    label = pd.Series(np.array(label), index=signal.index)
    if ax is None:
        plt.figure(figsize=(40, 5), dpi=300)
        signal_ax = plt.gca()
    else:
        signal_ax = ax
    signal_ax.set_ylabel("signal")
    a, = signal_ax.plot(signal, label='signal', c='k', linewidth=linewidth)
    b = None
    if train is not None:
        signal_ax.plot(train, label='train', c='grey', linewidth=linewidth)
    if scores is not None:
        scores_ax = plt.twinx(signal_ax)
        b, = scores_ax.plot(scores, label='scores', c='b', alpha=0.5, linewidth=linewidth)
        scores_ax.set_ylabel("scores")
    ylims = signal_ax.get_ylim()
    yrange = ylims[1] - ylims[0]
    ymin = ylims[0] - 0.1 * yrange
    ymax = ylims[1] + 0.1 * yrange
    signal_ax.set_ylim(ymin, ymax)
    for start, end in get_anomaly_regions(label):
        width = end - start
        thin_thresh = len(label) / 1e3
        width = np.maximum(width, thin_thresh)
        signal_ax.add_patch(patches.Rectangle((start, ylims[0]), width, ylims[1] - ylims[0], facecolor='red', alpha=0.4))
    red_patch = patches.Patch(color='red', label='anomaly', alpha=0.3)
    plt.legend(handles=[a, b, red_patch] if b is not None else [a, red_patch])
    if window_length is not None:
        locator = MultipleLocator(window_length)
        locator.MAXTICKS = 2000
        signal_ax.xaxis.set_minor_locator(locator)
    return signal_ax

In [None]:
def get_anomaly_regions(labels):
    anomaly_starts = np.where(np.diff(labels) == 1)[0] + 1
    anomaly_ends, = np.where(np.diff(labels) == -1)
    if len(anomaly_ends):
        if not len(anomaly_starts) or anomaly_ends[0] < anomaly_starts[0]:
            # we started with an anomaly, so the start of the first anomaly is the start of the lables
            anomaly_starts = np.concatenate([[0], anomaly_starts])
    if len(anomaly_starts):
        if not len(anomaly_ends) or anomaly_ends[-1] < anomaly_starts[-1]:
            # we ended on an anomaly, so the end of the last anomaly is the end of the labels
            anomaly_ends = np.concatenate([anomaly_ends, [len(labels) - 1]])
    return list(zip(anomaly_starts, anomaly_ends))

In [None]:
ad_flag.value_counts()

In [None]:
this_data

In [None]:
from joblib.parallel import Parallel, delayed
from time import time

def eval_one(row):
    i, (name, values, ad_flag, ad_score, baseline) = row
    values = pd.Series(values[1:-1].split(b",")).astype(float)
    ad_score = np.abs(pd.Series(ad_score[1:-1].split(b",")).astype(float))
    #ad_flag = pd.Series(ad_flag[1:-1].split(b",")).astype(float)

    baseline = pd.Series(baseline[1:-1].split(b",")).astype(float)

    this_data = pd.read_csv("benchmark_exp/TSB-AD/TSB-AD-U/" + name + ".csv")
    label = this_data['Label']
    assert np.allclose(this_data['Data'], values)
    tick = time()
    slidingWindow = find_length_rank(this_data.Data, rank=1)
    evaluation_result = get_metrics(ad_score, label, slidingWindow=slidingWindow)
    return name, evaluation_result, time - tick()

In [None]:
res = Parallel(n_jobs=-1, verbose=1)(delayed(eval_one)(row) for row in df.iterrows())

In [None]:
res_new = []
for a, d in res:
    d2 = d.copy()
    d2['name'] = a
    res_new.append(d2)

In [None]:
pd.DataFrame(res_new).to_csv("results_seasonal_decompose.csv")

In [None]:
from TSB_AD.utils.slidingWindows import find_length_rank
from tqdm import tqdm



scores = {}
for row in tqdm(list(df.iterrows())):
    i, (name, values, ad_flag, ad_score, baseline) = row
    values = pd.Series(values[1:-1].split(b",")).astype(float)
    ad_score = np.abs(pd.Series(ad_score[1:-1].split(b",")).astype(float))
    #ad_flag = pd.Series(ad_flag[1:-1].split(b",")).astype(float)

    baseline = pd.Series(baseline[1:-1].split(b",")).astype(float)

    this_data = pd.read_csv("benchmark_exp/TSB-AD/TSB-AD-U/" + name + ".csv")
    label = this_data['Label']
    assert np.allclose(this_data['Data'], values)
    slidingWindow = find_length_rank(this_data.Data, rank=1)
    evaluation_result = get_metrics(ad_score, label, slidingWindow=slidingWindow)
    scores[name] = evaluation_result
    #plot_detection(values, label, scores=ad_score)
    #plot_detection(values, label, scores=ad_flag)

In [None]:
import matplotlib.pyplot as plt
plt.plot(values, label="signal")
plt.plot(baseline, label="baseline")
plt.plot(ad_score, label="score")
plt.legend()

In [None]:
score

In [None]:
this_data = pd.read_csv("benchmark_exp/TSB-AD/TSB-AD-U/" + name + ".csv")


In [None]:
this_data