# Dataset sockshop (9n6mf)

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
DATASET_ID = "9n6mf"

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams["font.size"] = 7
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['axes.linewidth'] = 1.0
plt.rcParams['axes.grid'] = True

In [1]:
import sys
sys.path.append('../../')

from tsdr import tsdr
from eval import groundtruth
from meltria import loader
from eval import validation

In [5]:
metrics_files = !find "/datasets/argowf-chaos-{DATASET_ID}/" -type f -name "*.json"
records = loader.load_dataset(metrics_files, target_metric_types={
        "containers": True,
        "services": True,
        "nodes": True,
        "middlewares": True,
    },
    num_datapoints=4*45, # 45min
)

In [None]:
len(records)

135

In [None]:
well_injected_records = validation.find_records_detected_anomalies_of_sli(records, faulty_datapoints=4*5, sli_index=0)

In [None]:
len(well_injected_records)

111

In [None]:
well_injected_records = validation.find_records_detected_anomalies_of_cause_metrics(well_injected_records, faulty_datapoints=4*5)

In [None]:
len(well_injected_records)

105

In [None]:
import sys
sys.path.append("../")
from eval.tsdr import sweep_tsdr_and_save_as_cache

In [None]:
import os

os.environ["TSDR_NEPTUNE_PROJECT"] = "yuuk1/tsdr"

sweep_tsdr_and_save_as_cache(
    dataset_id=DATASET_ID,
    records=well_injected_records,
    list_of_tsdr_options=[
        {
            "enable_unireducer": True,
            "enable_multireducer": False,
            "step1_method_name": "residual_integral",
            "step1_residual_integral_threshold": 20,
            "step1_residual_integral_change_start_point": False,
            "step1_residual_integral_change_start_point_n_sigma": 3,
        },
        # {
        #     "enable_unireducer": False,
        #     "enable_multireducer": True,
        #     "step2_clustering_method_name": "dbscan",
        #     "step2_dbscan_min_pts": 2,
        #     "step2_dbscan_dist_type": "sbd",
        #     "step2_dbscan_algorithm": "hdbscan",
        #     "step2_clustering_series_type": "raw",
        #     "step2_clustering_choice_method": "medoid",
        # },
        # {
        #     "enable_unireducer": True,
        #     "enable_multireducer": True,
        #     "step1_method_name": "residual_integral",
        #     "step1_residual_integral_threshold": 20,
        #     "step1_residual_integral_change_start_point": False,
        #     "step1_residual_integral_change_start_point_n_sigma": 3,
        #     "step2_clustering_method_name": "dbscan",
        #     "step2_dbscan_min_pts": 2,
        #     "step2_dbscan_dist_type": "sbd",
        #     "step2_dbscan_algorithm": "hdbscan",
        #     "step2_clustering_series_type": "raw",
        #     "step2_clustering_choice_method": "medoid",
        # }, {
        #     "enable_unireducer": True,
        #     "enable_multireducer": True,
        #     "step1_method_name": "residual_integral",
        #     "step1_residual_integral_threshold": 20,
        #     "step1_residual_integral_change_start_point": False,
        #     "step1_residual_integral_change_start_point_n_sigma": 3,
        #     "step2_clustering_method_name": "hierarchy",
        #     "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        #     "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        #     "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # },
        # {
        #     "enable_unireducer": True,
        #     "enable_multireducer": True,
        #     "step2_clustering_method_name": "hierarchy",
        #     "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        #     "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        #     "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # }, {
        #     "enable_unireducer": True,
        #     "enable_multireducer": True,
        #     "step2_dbscan_min_pts": 1,
        #     "step2_dbscan_dist_type": "pearsonr",  # 'pearsonr' or 'sbd'
        #     "step2_dbscan_algorithm": "dbscan",  # 'dbscan' or 'hdbscan'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # }, {
        #     "enable_unireducer": True,
        #     "enable_multireducer": True,
        #     "step2_dbscan_min_pts": 1,
        #     "step2_dbscan_dist_type": "sbd",  # 'pearsonr' or 'sbd'
        #     "step2_dbscan_algorithm": "dbscan",  # 'dbscan' or 'hdbscan'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # }, {
        #     "enable_unireducer": False,
        #     "enable_multireducer": True,
        #     "step2_clustering_method_name": "hierarchy",
        #     "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        #     "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        #     "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # }, {
        #     "step1_model_name": "two_samp_test",
        #     "step1_two_samp_test_method": "ks", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        #     "step1_two_samp_test_alpha": 0.05,
        #     "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
        # }, {
        #     "step1_model_name": "two_samp_test",
        #     "step1_two_samp_test_method": "e-diagnosis", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        #     "step1_two_samp_test_n_resamples": 999,
        #     "step1_two_samp_test_alpha": 0.05,
        #     "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
        # }, {
        #     "step1_model_name": "fluxinfer",
        #     "step1_fluxinfer_sigma_threshold": 3,
        # }, {
        #     "step1_model_name": "two_samp_test",
        #     "step1_two_samp_test_method": "ks", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        #     "step1_two_samp_test_alpha": 0.05,
        #     "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
        #     "step2_clustering_method_name": "hierarchy",
        #     "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        #     "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        #     "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # }, {
        #     "step1_model_name": "two_samp_test",
        #     "step1_two_samp_test_method": "e-diagnosis", # 'ks' or 'ad' or 'es' or 'e-diagnosis'
        #     "step1_two_samp_test_n_resamples": 999,
        #     "step1_two_samp_test_alpha": 0.05,
        #     "step1_two_samp_test_seg_idx": -(4 * 10),  # 0 means division at midpoint
        #     "step2_clustering_method_name": "hierarchy",
        #     "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        #     "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
        #     "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # }, {
        #     "step1_model_name": "fluxinfer",
        #     "step1_fluxinfer_sigma_threshold": 3,
        #     "step2_clustering_method_name": "hierarchy",
        #     "step2_hierarchy_dist_threshold": 0.02,  # should be <1.0 if 'sbd' is specified
        #     "step2_hierarchy_dist_type": "sbd", # 'sbd' or 'hamming'
    #     "step2_hierarchy_linkage_method": "single",  # 'single','complete','average','weighted', 'centroid', 'median', 'ward'
        #     "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        #     "step2_clustering_choice_method": "medoid",  # 'medoid' or 'maxsum'
        # },
    ],
    use_manually_selected_metrics=[True, False],
)

NameError: name 'sweep_tsdr_and_save_as_cache' is not defined