# Evaluation of Standard Localization methods

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
SOCKSHOP_DATASET_ID = "9n6mf"

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams["font.size"] = 7
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['axes.linewidth'] = 1.0
plt.rcParams['axes.grid'] = True

In [4]:
import sys
sys.path.append('../')

In [5]:
from eval.tsdr import load_tsdr_by_chaos

dataset_by_chaos = load_tsdr_by_chaos(
    SOCKSHOP_DATASET_ID, 
    metric_types={
        "services": True,
        "containers": True,
        "middlewares": True,
        "nodes": False,
    },
    revert_normalized_time_series=False,
    tsdr_options={
        "enable_unireducer": False,
        "enable_multireducer": False,
    },
    target_chaos_types={"pod-cpu-hog", "pod-memory-hog", "pod-network-latency"},
    use_manually_selected_metrics=False,
    validation_filtering=(True, 4*5)
)

In [31]:
import pandas as pd

record, data_df_by_metric_type = dataset_by_chaos[("pod-cpu-hog", "carts")][0]
print(record.chaos_case_full())
dataset = pd.concat([
    data_df_by_metric_type["containers"][0],
    data_df_by_metric_type["services"][0],
    # data_df_by_metric_type["middlewares"][0],
], axis=1)
dataset.shape

carts/pod-cpu-hog/3


(180, 183)

In [41]:
from joblib import Parallel, delayed
from collections import defaultdict

from diagnoser import diag

results = Parallel(n_jobs=10)(delayed(diag.build_and_walk_causal_graph)(
    dataset, record.pk, 
    root_metric_type="latency",
    enable_prior_knowledge=True,
    use_call_graph=False,
    use_complete_graph=False,
    pc_library="causallearn",
    cg_algo="pc",
    pc_citest="gsq",
    pc_citest_alpha=0.10,
    pc_citest_bins=5,
    pc_stable=False,
    disable_orientation=False,
    disable_ci_edge_cut=False,
    use_indep_test_instead_of_ci=False,
    walk_method="monitorrank",
    corr_method="left_shift",
    corr_left_shift_lp=5,
) for _ in range(10))

In [42]:
k = 5
scores: dict[str, int] = defaultdict(int)
for g, ranks in results:
    for m, r in ranks[:k]:
        scores[m] += 1
sorted([(metric, n / 10) for (metric, n) in scores.items()], key=lambda x: x[1], reverse=True)

[('s-orders_throughput', 0.9),
 ('s-shipping_throughput', 0.9),
 ('s-front-end_throughput', 0.9),
 ('s-front-end_errors', 0.6),
 ('c-carts-db_cpu_cfs_throttled_periods_total', 0.4),
 ('c-carts-db_memory_working_set_bytes', 0.3),
 ('c-carts-db_memory_rss', 0.3),
 ('s-user_latency', 0.2),
 ('c-orders-db_network_receive_bytes_total', 0.2),
 ('s-orders_latency', 0.1),
 ('c-orders-db_memory_working_set_bytes', 0.1),
 ('c-carts_threads', 0.1)]