## Dataset trainticket m9dgg

In [5]:
%reload_ext autoreload
%autoreload 2

In [4]:
DATASET_ID = "m9dgg"

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams["font.size"] = 7
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['axes.linewidth'] = 1.0
plt.rcParams['axes.grid'] = True

In [2]:
import sys
sys.path.append('../../')

from tsdr import tsdr
from eval import groundtruth
from meltria import loader
from eval import validation

In [6]:
metrics_files = !find "/datasets/argowf-chaos-{DATASET_ID}/" -type f -name "*.json"
records = loader.load_dataset(metrics_files, target_metric_types={
        "containers": True,
        "services": True,
        "nodes": True,
        "middlewares": True,
    },
    num_datapoints=4*45, # 45min
)

In [7]:
len(records)

93

In [27]:
well_injected_records = validation.find_records_detected_anomalies_of_sli(records, faulty_datapoints=4*5, sli_index=1)

In [28]:
len(well_injected_records)

42

In [30]:
well_injected_records = validation.find_records_detected_anomalies_of_cause_metrics(well_injected_records, faulty_datapoints=4*5)

In [31]:
len(well_injected_records)

42

In [32]:
import sys
sys.path.append("../")
from notebooklib.save import run_and_save_tsdr_to_each_set, load_tsdr_by_chaos

In [33]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step2_clustering_method_name": "hierarchy",
        "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
    },
    suffix="hierarchy-single_sbd_medoid",
)

In [34]:
run_and_save_tsdr_to_each_set(DATASET_ID, well_injected_records, suffix="hdbscan_sbd")

In [35]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step2_dbscan_min_pts": 1,
        "step2_dbscan_dist_type": "sbd",  # 'pearsonr' or 'sbd'
        "step2_dbscan_algorithm": "dbscan",  # 'dbscan' or 'hdbscan'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
    },
    suffix="fluxrank_sbd_medoid",
)



In [36]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step2_dbscan_min_pts": 1,
        "step2_dbscan_dist_type": "pearsonr",  # 'pearsonr' or 'sbd'
        "step2_dbscan_algorithm": "dbscan",  # 'dbscan' or 'hdbscan'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
    },
    suffix="fluxrank_pearsonr_medoid",
)



In [None]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step2_clustering_method_name": "hierarchy",
        "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
    },
    enable_unireducer=False,
    suffix="phase2only_hierarchy-single_sbd_medoid",
)

In [None]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step1_model_name": "two_samp_test",
        "step1_two_samp_test_method": "ks", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        "step1_two_samp_test_alpha": 0.05,
        "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
    },
    enable_multireducer=False,
    suffix="phase1only_kstest",
)

In [None]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step1_model_name": "two_samp_test",
        "step1_two_samp_test_method": "e-diagnosis", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        "step1_two_samp_test_n_resamples": 999,
        "step1_two_samp_test_alpha": 0.05,
        "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
    },
    enable_multireducer=False,
    suffix="phase1only_ediagnosis",
)

In [None]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step1_model_name": "fluxinfer",
        "step1_fluxinfer_sigma_threshold": 3,
    },
    enable_multireducer=False,
    suffix="phase1only_fluxinfer",
)

In [None]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step1_model_name": "two_samp_test",
        "step1_two_samp_test_method": "ks", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        "step1_two_samp_test_alpha": 0.05,
        "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
        "step2_clustering_method_name": "hierarchy",
        "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
    },
    suffix="kstest_hierarchy-single_sbd_medoid",
)

In [None]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step1_model_name": "two_samp_test",
        "step1_two_samp_test_method": "e-diagnosis", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        "step1_two_samp_test_n_resamples": 999,
        "step1_two_samp_test_alpha": 0.05,
        "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
        "step2_clustering_method_name": "hierarchy",
        "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
    },
    suffix="ediagnosis_hierarchy-single_sbd_medoid",
)

In [None]:
run_and_save_tsdr_to_each_set(
    DATASET_ID,
    well_injected_records, tsdr_options={
        "step1_model_name": "fluxinfer",
        "step1_fluxinfer_sigma_threshold": 3,
        "step2_clustering_method_name": "hierarchy",
        "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
    },
    suffix="fluxinfer_hierarchy-single_sbd_medoid",
)