## Collecting Anomaly Patterns for Evaludation of AD

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats

In [3]:
import sys
sys.path.append('../')
from tsdr import tsdr
from eval import groundtruth

INFO: Pandarallel will run on 10 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [4]:
from meltria import loader

metrics_files = !find /datasets/argowf-chaos-kwzdh/ -type f -name "*.json" 
dataset_generator = loader.load_dataset_as_generator(metrics_files, target_metric_types={
        "containers": True,
        "services": True,
        "nodes": True,
        "middlewares": False,
    },
    num_datapoints=120,
)

rerords = [r for rec in dataset_generator for r in rec]

In [5]:
import pandas as pd
from collections import defaultdict

In [6]:
route_by_case: dict[tuple[str, str], list[tuple[list[str], pd.DataFrame]]] = defaultdict(list)

for record in rerords:
    filtered_df: pd.DataFrame = tsdr.filter_out_no_change_metrics(record.data_df, parallel=True)
    gt_candidates = groundtruth.select_ground_truth_metrics_in_routes(record.pk, filtered_df.columns.to_list(), record.chaos_type(), record.chaos_comp())
    for routes, _ in gt_candidates:
        route_by_case[record.chaos_type(), record.chaos_comp()].append((routes, filtered_df))

In [64]:
import ipywidgets as widgets
from IPython.display import display

In [61]:
SAVE_DIR = "../samples/anomaly_patterns"

In [113]:
def create_widget(yield_on_click) -> widgets.Box:
    save_button = widgets.Button(description='Save')
    select = widgets.Select(
        options=[
            'Sudden increase', 'Sudden decrease', 'Level shift up', 'Level shift down', 
            'Steady increase', 'Steady decrease', 'Single spike', 'Single dip',
            'Transient level shift up', 'Transient level shift down', 'Multiple spikes', 'Multiple dips', 'Fluctuations',
            'White noise', 'Other normal',
        ],
        rows=15,
        description='Pattern:',
        layout=widgets.Layout(width='30%'),
    )
    output = widgets.Output(layout={'border': '1px solid black'})

    fig = plt.figure(figsize=(6, 4), clear=True)
    ax = fig.add_subplot(1, 1, 1)

    def on_save_click_callback(clicked_button: widgets.Button) -> None:
        target_app, chaos_type, chaos_comp, metric, ts = next(yield_on_click)
        ax.clear()
        ax.set_title(f"{chaos_type}/{chaos_comp}:{metric}\n")
        ax.plot(ts)
        with output:
            output.clear_output(wait=True)
            display(ax.get_figure())
        print(f"{select.value} selected for {chaos_type}/{chaos_comp}:{metric}")

    save_button.on_click(on_save_click_callback)
    save_button.click()  # trigger the first click
    plt.close(fig=fig)

    return widgets.HBox([select, save_button, output])

In [69]:
import datetime
import jsonlines

In [73]:
def gen_time_series(routes_by_case):
    now = datetime.datetime.today().strftime('%Y%m%d-%H%M%S')
    save_file_name = f"{SAVE_DIR}/anomaly_patterns_{now}.jsonl"
    # append mode
    writer = jsonlines.open(save_file_name, mode='a', flush=True)

    for (chaos_type, chaos_comp), routes in routes_by_case.items():
        unique_metric_check = {}
        for metrics, filtered_df in routes:
            for metric in metrics:
                # skip duplicated metric
                if metric in unique_metric_check and unique_metric_check[metric]:
                    continue
                unique_metric_check[metric] = True

                ts = filtered_df.loc[:, metric].to_numpy()
                pattern_name: str = (yield (record.target_app(), chaos_type, chaos_comp, metric, ts))
                writer.write({
                    'target_app': record.target_app(), 
                    'chaos_type': chaos_type,
                    'chaos_comp': chaos_comp, 
                    'metric': metric,
                    'anomaly_pattern': pattern_name,
                    'time_series': ts.tolist(),
                })
    writer.close()

In [114]:
box = create_widget(gen_time_series(route_by_case))
display(box)

HBox(children=(Select(description='Pattern:', layout=Layout(width='30%'), options=('Sudden increase', 'Sudden …