## Collecting Anomaly Patterns for Evaludation of AD

In [19]:
%reload_ext autoreload
%autoreload 2

In [20]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats

In [21]:
import sys
sys.path.append('../')
from tsdr import tsdr
from eval import groundtruth

In [22]:
import pandas as pd
from collections import defaultdict

In [25]:
from meltria import loader

metrics_files = !find /datasets/argowf-chaos-rq54b/ -type f -name "*.json" | head -n 10
dataset_generator = loader.load_dataset_as_generator(metrics_files, target_metric_types={
        "containers": True,
        "services": True,
        "nodes": True,
        "middlewares": True,
    },
    num_datapoints=120,
)
rerords = [r for rec in dataset_generator for r in rec]

In [27]:
from meltria.loader import DatasetRecord

route_by_record: dict[DatasetRecord, list[tuple[list[str], pd.DataFrame]]] = defaultdict(list)

for record in rerords:
    filtered_df: pd.DataFrame = tsdr.filter_out_no_change_metrics(record.data_df, parallel=True)
    gt_candidates = groundtruth.select_ground_truth_metrics_in_routes(record.pk, filtered_df.columns.to_list(), record.chaos_type(), record.chaos_comp())
    for routes, _ in gt_candidates:
        route_by_record[record].append((routes, filtered_df))

In [28]:
import ipywidgets as widgets
from IPython.display import display

In [29]:
SAVE_DIR = "../samples/anomaly_patterns"

In [30]:
import datetime
import jsonlines
import glob

In [77]:
def create_widget(yield_on_click) -> widgets.Box:
    save_button = widgets.Button(description='Save')
    skip_button = widgets.Button(description='Skip')
    select_pattern = widgets.Select(
        options=[
            'Sudden increase', 'Sudden decrease', 'Level shift up', 'Level shift down', 
            'Steady increase', 'Steady decrease', 'Single spike', 'Single dip',
            'Transient level shift up', 'Transient level shift down', 'Multiple spikes', 'Multiple dips', 'Fluctuations',
            'White noise', 'Other normal',
        ],
        rows=15,
        description='Pattern:',
        layout=widgets.Layout(width='30%'),
    )
    select_position = widgets.Select(
        options=["no_anomaly", "anomaly_during_the_chaos", "anomaly_in_before_chaos"],
        layout=widgets.Layout(width='30%'),
    )
    output = widgets.Output(layout={'border': '1px solid black'})

    fig = plt.figure(figsize=(6, 8), clear=True)
    ax = fig.add_subplot(2, 1, 1)
    ax_sli = fig.add_subplot(2, 1, 2)

    def show() -> None:
        target_app, chaos_type, chaos_comp, metric, ts, sli_ts = next(yield_on_click)
        ax.clear()
        ax.set_title(f"{chaos_type}/{chaos_comp}\n{metric}\n")
        ax.plot(ts)
        ax_sli.clear()
        ax_sli.plot(sli_ts)
        ax_sli.set_title(f"SLI: {chaos_type}/{chaos_comp}")
        with output:
            display(fig)

    def on_save_click_callback(clicked_button: widgets.Button) -> None:
        yield_on_click.send(select_position.value, select_pattern.value)
        with output:
            output.clear_output(wait=True)
            print(f"Selected {select_pattern.value} and {select_position.value}!")
        show()

    save_button.on_click(on_save_click_callback)
    plt.close(fig=fig)
    show()

    def on_skip_click_callback(clicked_button: widgets.Button) -> None:
        with output:
            output.clear_output(wait=True)
            print(f"Skipped")
        show()

    skip_button.on_click(on_skip_click_callback)

    return widgets.HBox([widgets.VBox([select_position, select_pattern]), widgets.VBox([save_button, skip_button]), output])

In [78]:
def gen_time_series(routes_by_record):
    now = datetime.datetime.today().strftime('%Y%m%d-%H%M%S')
    save_file_name = f"{SAVE_DIR}/anomaly_patterns_{now}.jsonl"
    # append mode
    writer = jsonlines.open(save_file_name, mode='a', flush=True)

    for record, routes in routes_by_record.items():
        unique_metric_check = {}
        sli_metric: str = "m-ts-ui-dashboard_nginx_http_response_count_total"
        sli_ts = record.data_df.loc[:, sli_metric].to_numpy()
        for metrics, filtered_df in routes:
            # sli_metric: str = record.pk.get_root_metrics()[0]
            for metric in metrics:
                # skip duplicated metric
                if metric in unique_metric_check and unique_metric_check[metric]:
                    continue
                unique_metric_check[metric] = True

                ts = filtered_df.loc[:, metric].to_numpy()
                position, pattern_name = (yield (record.target_app(), record.chaos_type(), record.chaos_comp(), metric, ts, sli_ts))
                if position is None or pattern_name is None:
                    continue
                writer.write({
                    'target_app': record.target_app(), 
                    'chaos_type': record.chaos_type(),
                    'chaos_comp': record.chaos_comp(), 
                    'metric': metric,
                    'anomaly_position': position,
                    'anomaly_pattern': pattern_name,
                    'time_series': ts.tolist(),
                })
    writer.close()

In [79]:
box = create_widget(gen_time_series(route_by_record))
display(box)

HBox(children=(VBox(children=(Select(layout=Layout(width='30%'), options=('no_anomaly', 'anomaly_during_the_ch…