# Evaluation of tsdr on localization

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams["font.size"] = 7
plt.rcParams['xtick.labelsize'] = 9
plt.rcParams['ytick.labelsize'] = 9
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['axes.linewidth'] = 1.0
plt.rcParams['axes.grid'] = True

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore', FutureWarning)

import sys
sys.path.append("../")

import logging
logger = logging.getLogger()
if not logger.handlers:
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.WARN)
    logger.addHandler(handler)
    logger.setLevel(logging.WARN)

In [4]:
SOCKSHOP_DATASET_ID = "9n6mf"
TRAINTICKET_DATASET_ID = "m9dgg"

from meltria.priorknowledge.priorknowledge import SockShopKnowledge

pk = SockShopKnowledge(
    target_metric_types={
        "containers": True,
        "services": True,
        "middlewares": True,
        "nodes": False,
    },
    mappings={"nodes-containers": {}},
)

In [20]:
from notebooklib import localization

results_all = localization.grid_dataset_with_multi_diag_options([
    ("hierarchy-single_sbd_medoid_only_ctnrs", localization.DiagTargetPhaseOption.FIRST),  # skip phase2
    ("hierarchy-single_sbd_medoid_only_ctnrs", localization.DiagTargetPhaseOption.RAW),  # no reduction
    "hierarchy-single_sbd_medoid_only_ctnrs",
    "hdbscan_sbd_only_ctnrs",
    "fluxrank_pearsonr_medoid_only_ctnrs",
    # "fluxrank_pearsonr_max_cluster_only_ctnrs",
    "fluxrank_sbd_medoid_only_ctnrs",
    # "hdbscan_sbd_max_cluster_only_ctnrs",
    "phase2only_hierarchy-single_sbd_medoid_only_ctnrs",
    "phase1only_kstest_only_ctnrs",
    "phase1only_ediagnosis_only_ctnrs",
    "phase1only_fluxinfer_only_ctnrs",
    "kstest_hierarchy-single_sbd_medoid_only_services",
    "ediagnosis_hierarchy-single_sbd_medoid_only_services",
    "fluxinfer_hierarchy-single_sbd_medoid_only_services",
], SOCKSHOP_DATASET_ID, n=10, list_of_diag_options=[dict(
    enable_prior_knowledge=True,
    use_call_graph=True,
    use_complete_graph=False,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=False,
    walk_method="monitorrank",
    root_metric_type="latency",
), dict(
    enable_prior_knowledge=True,
    use_call_graph=False,
    use_complete_graph=False,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=False,
    walk_method="monitorrank",
    root_metric_type="latency",
), dict(
    enable_prior_knowledge=False,
    use_call_graph=False,
    use_complete_graph=True,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=False,
    walk_method="monitorrank",
    root_metric_type="latency",
), dict(
    enable_prior_knowledge=False,
    use_call_graph=False,
    use_complete_graph=False,
    pc_library="cdt",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="gaussian",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=False,
    walk_method="monitorrank",
    root_metric_type="latency",
), dict(
    enable_prior_knowledge=False,
    use_call_graph=False,
    use_complete_graph=False,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=True,
    walk_method="monitorrank",
    root_metric_type="latency",
), dict(
    enable_prior_knowledge=True,
    use_call_graph=False,
    use_complete_graph=False,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=True,
    walk_method="monitorrank",
    root_metric_type="latency",
),
], 
    use_manually_selected_metrics=True,
)

Loading 9n6mf with hierarchy-single_sbd_medoid_only_ctnrs...


KeyboardInterrupt: 

## TrainTicket Services

In [17]:
from notebooklib import localization

results_all_trainticket = localization.grid_dataset_with_multi_diag_options([
    ("hierarchy-single_sbd_medoid_only_ctnrs", localization.DiagTargetPhaseOption.FIRST),  # skip phase2
    ("hierarchy-single_sbd_medoid_only_ctnrs", localization.DiagTargetPhaseOption.RAW),  # no reduction
    "hierarchy-single_sbd_medoid_only_ctnrs",
    "hdbscan_sbd_only_ctnrs",
    "fluxrank_pearsonr_medoid_only_ctnrs",
    # "fluxrank_pearsonr_max_cluster_only_ctnrs",
    "fluxrank_sbd_medoid_only_ctnrs",
    # "hdbscan_sbd_max_cluster_only_ctnrs",
    "phase2only_hierarchy-single_sbd_medoid_only_ctnrs",
    "phase1only_kstest_only_ctnrs",
    "phase1only_ediagnosis_only_ctnrs",
    "phase1only_fluxinfer_only_ctnrs",
    "kstest_hierarchy-single_sbd_medoid_only_services",
    "ediagnosis_hierarchy-single_sbd_medoid_only_services",
    "fluxinfer_hierarchy-single_sbd_medoid_only_services",
], TRAINTICKET_DATASET_ID, n=10, list_of_diag_options=[dict(
    enable_prior_knowledge=True,
    use_call_graph=True,
    use_complete_graph=False,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=False,
    walk_method="monitorrank",
    root_metric_type="throughput",
# ), dict(
#     enable_prior_knowledge=True,
#     use_call_graph=False,
#     use_complete_graph=False,
#     pc_library="pcalg",
#     cg_algo="pc",
#     pc_citest_alpha=0.05,
#     pc_citest="fisher-z",
#     pc_variant="stable",
#     disable_orientation=False,
#     disable_ci_edge_cut=False,
#     use_indep_test_instead_of_ci=False,
#     walk_method="monitorrank",
    # root_metric_type="throughput",
), dict(
    enable_prior_knowledge=False,
    use_call_graph=False,
    use_complete_graph=True,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=False,
    walk_method="monitorrank",
    root_metric_type="throughput",
# ), dict(
    # enable_prior_knowledge=False,
    # use_call_graph=False,
    # use_complete_graph=False,
    # pc_library="cdt",
    # cg_algo="pc",
    # pc_citest_alpha=0.05,
    # pc_citest="gaussian",
    # pc_variant="stable",
    # disable_orientation=False,
    # disable_ci_edge_cut=False,
    # use_indep_test_instead_of_ci=False,
    # walk_method="monitorrank",
    # root_metric_type="throughput",
), dict(
    enable_prior_knowledge=False,
    use_call_graph=False,
    use_complete_graph=False,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=True,
    walk_method="monitorrank",
    root_metric_type="throughput",
), dict(
    enable_prior_knowledge=True,
    use_call_graph=False,
    use_complete_graph=False,
    pc_library="pcalg",
    cg_algo="pc",
    pc_citest_alpha=0.05,
    pc_citest="fisher-z",
    pc_variant="stable",
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=True,
    walk_method="monitorrank",
    root_metric_type="throughput",
),
])

Loading m9dgg with hierarchy-single_sbd_medoid_only_ctnrs...
Processing m9dgg with hierarchy-single_sbd_medoid_only_ctnrs...
https://app.neptune.ai/yuuk1/tsdr-localization/e/TSDRLOC-318
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 67 operations to synchronize with Neptune. Do not kill this process.
All 67 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/yuuk1/tsdr-localization/e/TSDRLOC-318/metadata
Loading m9dgg with hierarchy-single_sbd_medoid_only_ctnrs...
Processing m9dgg with hierarchy-single_sbd_medoid_only_ctnrs...
https://app.neptune.ai/yuuk1/tsdr-localization/e/TSDRLOC-319
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 2 operations to synchronize with Neptune. Do not kill this process.
All 2 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/yuuk1/tsdr-localization/e/TSDRLOC-319/metadata
Load

### xcorr

In [27]:
from eval import localization

localization.sweep_localization_and_save_as_cache(
    dataset_id=SOCKSHOP_DATASET_ID, 
    n=10,
    list_of_tsdr_options=[dict(
        enable_unireducer=True,
        enable_multireducer=True,
        step1_method_name="residual_integral",
        step1_residual_integral_threshold=20,
        step1_residual_integral_change_start_point= False,
        step1_residual_integral_change_start_point_n_sigma= 3,
        step2_dbscan_min_pts= 1,
        step2_dbscan_dist_type= "pearsonr",  # 'pearsonr' or 'sbd'
        step2_dbscan_algorithm= "dbscan",  # 'dbscan' or 'hdbscan'
        step2_clustering_series_type= "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        step2_clustering_choice_method= "medoid",  # 'medoid' or 'maxsum'
    )],
    list_of_diag_options=[dict(
        enable_prior_knowledge=False,
        use_call_graph=False,
        use_complete_graph=True,
        pc_library="pcalg",
        cg_algo="pc",
        pc_citest_alpha=0.05,
        pc_citest="fisher-z",
        pc_variant="stable",
        disable_orientation=False,
        disable_ci_edge_cut=False,
        use_indep_test_instead_of_ci=False,
        walk_method="monitorrank",
        root_metric_type="latency",
        corr_method="xcorr",
    ), dict(
        enable_prior_knowledge=False,
        use_call_graph=False,
        use_complete_graph=False,
        pc_library="pcalg",
        cg_algo="pc",
        pc_citest_alpha=0.05,
        pc_citest="fisher-z",
        pc_variant="stable",
        disable_orientation=False,
        disable_ci_edge_cut=False,
        use_indep_test_instead_of_ci=True,
        walk_method="monitorrank",
        root_metric_type="latency",
        corr_method="xcorr",
    )],
    pair_of_use_manually_selected_metrics=[True],
)

Processing {'services': True, 'containers': True, 'middlewares': False, 'nodes': False}, manually_selected?=True, {'enable_unireducer': True, 'enable_multireducer': True, 'step1_method_name': 'residual_integral', 'step1_residual_integral_threshold': 20, 'step1_residual_integral_change_start_point': False, 'step1_residual_integral_change_start_point_n_sigma': 3, 'step2_dbscan_min_pts': 1, 'step2_dbscan_dist_type': 'pearsonr', 'step2_dbscan_algorithm': 'dbscan', 'step2_clustering_series_type': 'raw', 'step2_clustering_choice_method': 'medoid'}, {'enable_prior_knowledge': False, 'use_call_graph': False, 'use_complete_graph': True, 'pc_library': 'pcalg', 'cg_algo': 'pc', 'pc_citest_alpha': 0.05, 'pc_citest': 'fisher-z', 'pc_variant': 'stable', 'disable_orientation': False, 'disable_ci_edge_cut': False, 'use_indep_test_instead_of_ci': False, 'walk_method': 'monitorrank', 'root_metric_type': 'latency', 'corr_method': 'xcorr'}
https://app.neptune.ai/yuuk1/tsdr-localization/e/TSDRLOC-374
no ca

## CausalRCA

In [43]:
from eval import localization
from diagnoser.daggnn import config

localization.sweep_localization_and_save_as_cache(
    dataset_id=SOCKSHOP_DATASET_ID, 
    n=10,
    list_of_tsdr_options=[
        # dict(
        #     enable_unireducer=False,
        #     enable_multireducer=False,
        #     step1_method_name="residual_integral",
        #     step1_residual_integral_threshold= 20,
        #     step1_residual_integral_change_start_point= False,
        #     step1_residual_integral_change_start_point_n_sigma= 3,
        # ),
        dict(
            enable_unireducer=True,
            enable_multireducer=True,
            step1_method_name="residual_integral",
            step1_residual_integral_threshold=20,
            step1_residual_integral_change_start_point= False,
            step1_residual_integral_change_start_point_n_sigma= 3,
            step2_dbscan_min_pts= 1,
            step2_dbscan_dist_type= "pearsonr",  # 'pearsonr' or 'sbd'
            step2_dbscan_algorithm= "dbscan",  # 'dbscan' or 'hdbscan'
            step2_clustering_series_type= "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
            step2_clustering_choice_method= "medoid",  # 'medoid' or 'maxsum'
        ), 
        dict(
            enable_unireducer=True,
            enable_multireducer=True,
            step1_method_name="residual_integral",
            step1_residual_integral_threshold=20,
            step1_residual_integral_change_start_point=False,
            step1_residual_integral_change_start_point_n_sigma=3,
            step2_dbscan_min_pts=1,
            step2_dbscan_dist_type="sbd",  # 'pearsonr' or 'sbd'
            step2_dbscan_algorithm="dbscan",  # 'dbscan' or 'hdbscan'
            step2_clustering_series_type="raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
            step2_clustering_choice_method="medoid",  # 'medoid' or 'maxsum'
        ),
        dict(
            enable_unireducer=True,
            enable_multireducer=False,
            step1_method_name="residual_integral",
            step1_residual_integral_threshold=20,
            step1_residual_integral_change_start_point=False,
            step1_residual_integral_change_start_point_n_sigma=3,
        ),
    ],
    list_of_diag_options=[
        dict(config.Config().to_prefixed_dict("causalrca"), use_causalrca=True),
    ],
    metric_types_pairs=[{
        "services": True,
        "containers": True,
        "middlewares": False,
        "nodes": False,
    }],
    pair_of_use_manually_selected_metrics=[True, False],
)

Processing Pattern:1/16 {'services': True, 'containers': True, 'middlewares': False, 'nodes': False}, manually_selected?=True, {'enable_unireducer': False, 'enable_multireducer': False, 'step1_method_name': 'residual_integral', 'step1_residual_integral_threshold': 20, 'step1_residual_integral_change_start_point': False, 'step1_residual_integral_change_start_point_n_sigma': 3}, {'causalrca_epochs': 1000, 'causalrca_sample_to_batch_size_factor': 5, 'causalrca_lr': 0.001, 'causalrca_x_dims': 1, 'causalrca_z_dims': 1, 'causalrca_optimizer': 'Adam', 'causalrca_graph_threshold': 0.3, 'causalrca_tau_A': 0.0, 'causalrca_lambda_A': 0.0, 'causalrca_c_A': 1, 'causalrca_c_A_ul': 1e+20, 'causalrca_use_A_connect_loss': 1, 'causalrca_use_A_positiver_loss': 1, 'causalrca_cuda': False, 'causalrca_seed': 42, 'causalrca_encoder_hidden': 64, 'causalrca_decoder_hidden': 64, 'causalrca_temp': 0.5, 'causalrca_k_max_iter': 5, 'causalrca_encoder': 'mlp', 'causalrca_decoder': 'mlp', 'causalrca_encoder_dropout':

ValueError: Dataset 9n6mf is not cached, /home/ubuntu/src/github.com/ai4sre/meltria-analyzer/notebooks/../dataset/data/tsdr_9n6mf_084ffdc13b077dd30e32296f4cf15278 does not exist. {'services': True, 'containers': True, 'middlewares': False, 'nodes': False}, {'enable_unireducer': False, 'enable_multireducer': False, 'step1_method_name': 'residual_integral', 'step1_residual_integral_threshold': 20, 'step1_residual_integral_change_start_point': False, 'step1_residual_integral_change_start_point_n_sigma': 3}, True

## RCD

In [31]:
from eval import localization

import os
os.environ["NEPTUNE_MODE"] = "async"

localization.sweep_localization_and_save_as_cache(
    dataset_id=SOCKSHOP_DATASET_ID, 
    n=10,
    list_of_tsdr_options=[
        # dict(
        #     enable_unireducer=False,
        #     enable_multireducer=False,
        #     step1_method_name="residual_integral",
        #     step1_residual_integral_threshold= 20,
        #     step1_residual_integral_change_start_point= False,
        #     step1_residual_integral_change_start_point_n_sigma= 3,
        # ),
        dict(
            enable_unireducer=True,
            enable_multireducer=True,
            step1_method_name="residual_integral",
            step1_residual_integral_threshold=20,
            step1_residual_integral_change_start_point= False,
            step1_residual_integral_change_start_point_n_sigma= 3,
            step2_dbscan_min_pts= 1,
            step2_dbscan_dist_type= "pearsonr",  # 'pearsonr' or 'sbd'
            step2_dbscan_algorithm= "dbscan",  # 'dbscan' or 'hdbscan'
            step2_clustering_series_type= "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
            step2_clustering_choice_method= "medoid",  # 'medoid' or 'maxsum'
        ), 
        dict(
            enable_unireducer=True,
            enable_multireducer=True,
            step1_method_name="residual_integral",
            step1_residual_integral_threshold=20,
            step1_residual_integral_change_start_point=False,
            step1_residual_integral_change_start_point_n_sigma=3,
            step2_dbscan_min_pts=1,
            step2_dbscan_dist_type="sbd",  # 'pearsonr' or 'sbd'
            step2_dbscan_algorithm="dbscan",  # 'dbscan' or 'hdbscan'
            step2_clustering_series_type="raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
            step2_clustering_choice_method="medoid",  # 'medoid' or 'maxsum'
        ),
        dict(
            enable_unireducer=True,
            enable_multireducer=False,
            step1_method_name="residual_integral",
            step1_residual_integral_threshold=20,
            step1_residual_integral_change_start_point=False,
            step1_residual_integral_change_start_point_n_sigma=3,
        ),
    ],
    list_of_diag_options=[
        # dict(use_rcd=True, rcd_boundary_index=160, rcd_localized=True, rcd_gamma=5, rcd_bins=5, rcd_n_iters=10, rcd_topk=5),
        dict(use_rcd=True, rcd_boundary_index=164, rcd_localized=True, rcd_gamma=5, rcd_bins=5, rcd_n_iters=10, rcd_topk=5),
        dict(use_rcd=True, rcd_boundary_index=168, rcd_localized=True, rcd_gamma=5, rcd_bins=5, rcd_n_iters=10, rcd_topk=5),
        # dict(use_rcd=True, rcd_boundary_index=156, rcd_localized=True, rcd_gamma=5, rcd_bins=5, rcd_n_iters=10, rcd_topk=5),
        # dict(use_rcd=True, rcd_boundary_index=152, rcd_localized=True, rcd_gamma=5, rcd_bins=5, rcd_n_iters=10, rcd_topk=5),
    ],
    metric_types_pairs=[{
        "services": True,
        "containers": True,
        "middlewares": False,
        "nodes": False,
    }],
    pair_of_use_manually_selected_metrics=[False],
)

{'mean_by_chaos_type-df': chaos_type
pod-cpu-hog            10.445869
pod-memory-hog         10.263355
pod-network-latency     6.518415
Name: elapsed_time, dtype: float64, 'mean_by_chaos_comp-df':               elapsed_time
chaos_comp                
carts             7.447336
carts-db          8.896932
catalogue         8.005323
catalogue-db      9.672845
orders           11.468583
orders-db         9.188487
payment          13.802282
user              7.361781
user-db           8.233333, 'mean_by_chaos_type_and_chaos_comp':                                   elapsed_time
chaos_type          chaos_comp                
pod-cpu-hog         carts            12.863306
                    carts-db         15.930052
                    catalogue         7.633510
                    catalogue-db      8.429133
                    orders            8.708138
                    orders-db         8.588025
                    payment          15.133177
                    user             10.14331

Traceback (most recent call last):
  File "<__array_function__ internals>", line 177, in where
KeyboardInterrupt
Exception ignored in: 'sklearn.cluster._k_means_common._relocate_empty_clusters_dense'
Traceback (most recent call last):
  File "<__array_function__ internals>", line 177, in where
KeyboardInterrupt: 


KeyboardInterrupt: 