# Investigating the cause metrics for tsdr

In [21]:
%reload_ext autoreload
%autoreload 2

In [30]:
from collections import defaultdict, OrderedDict
import pathlib

import numpy as np
import pandas as pd
import torch
import torchinfo
import scipy.stats
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams["font.size"] = 7
plt.rcParams['xtick.labelsize'] = 9
plt.rcParams['ytick.labelsize'] = 9
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['axes.linewidth'] = 1.0
plt.rcParams['axes.grid'] = True

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore', FutureWarning)

import sys
sys.path.append("../")

from notebooklib.save import run_tsdr, save_tsdr, load_tsdr

In [31]:
DATASET_ID = "9n6mf"
datasets_hdbscan_sbd = load_tsdr(DATASET_ID, suffix="", revert_normalized_time_series=True)

In [34]:
from eval.groundtruth import check_cause_metrics

check_results = []
dataset_by_chaos_case = {}
for record, filtered_df, anomalous_df, reduced_df in datasets_hdbscan_sbd:

    anomalous_ok, anomalous_cause_metrics = check_cause_metrics(record.pk, anomalous_df.columns.tolist(), chaos_type=record.chaos_type(), chaos_comp=record.chaos_comp(), optional_cause=True)
    reduced_ok, reduced_cause_metrics = check_cause_metrics(record.pk, reduced_df.columns.tolist(), chaos_type=record.chaos_type(), chaos_comp=record.chaos_comp(), optional_cause=True)

    check_results.append((DATASET_ID, record.target_app(), record.chaos_type(), record.chaos_comp(), record.chaos_case_num(), anomalous_ok, anomalous_cause_metrics, reduced_ok, reduced_cause_metrics))
    dataset_by_chaos_case[(record.chaos_type(), record.chaos_comp(), record.chaos_case_num())] = (record, filtered_df, anomalous_df, reduced_df)

check_results_df = pd.DataFrame(check_results, columns=["dataset_id", "target_app", "chaos_type", "chaos_comp", "chaos_case_num", "anomalous_ok", "anomalous_cause_metrics", "reduced_ok", "reduced_cause_metrics"]).set_index(["dataset_id", "target_app", "chaos_type", "chaos_comp", "chaos_case_num"]).sort_index()
with pd.option_context("display.max_rows", None):
    display(check_results_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,anomalous_ok,anomalous_cause_metrics,reduced_ok,reduced_cause_metrics
dataset_id,target_app,chaos_type,chaos_comp,chaos_case_num,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
9n6mf,sock-shop,pod-cpu-hog,carts,2,True,"c-carts_file_descriptors,c-carts_cpu_cfs_throt...",True,"c-carts_file_descriptors,c-carts_memory_usage_..."
9n6mf,sock-shop,pod-cpu-hog,carts,4,True,"c-carts_memory_rss,c-carts_cpu_usage_seconds_t...",True,"c-carts_cpu_cfs_throttled_periods_total,c-cart..."
9n6mf,sock-shop,pod-cpu-hog,carts-db,0,True,"c-carts-db_memory_mapped_file,c-carts-db_proce...",True,"c-carts-db_processes,c-carts-db_memory_working..."
9n6mf,sock-shop,pod-cpu-hog,carts-db,1,True,"c-carts-db_cpu_usage_seconds_total,c-carts-db_...",True,"c-carts-db_memory_rss,c-carts-db_threads,c-car..."
9n6mf,sock-shop,pod-cpu-hog,carts-db,2,True,"c-carts-db_memory_failures_total,c-carts-db_cp...",True,"c-carts-db_threads,c-carts-db_cpu_cfs_throttle..."
9n6mf,sock-shop,pod-cpu-hog,catalogue,0,True,"c-catalogue_cpu_cfs_throttled_periods_total,c-...",True,"c-catalogue_file_descriptors,c-catalogue_cpu_s..."
9n6mf,sock-shop,pod-cpu-hog,catalogue,1,True,"c-catalogue_memory_failures_total,c-catalogue_...",True,"c-catalogue_memory_working_set_bytes,c-catalog..."
9n6mf,sock-shop,pod-cpu-hog,catalogue,4,True,"c-catalogue_cpu_user_seconds_total,c-catalogue...",True,"c-catalogue_cpu_user_seconds_total,c-catalogue..."
9n6mf,sock-shop,pod-cpu-hog,catalogue-db,1,True,"c-catalogue-db_memory_rss,c-catalogue-db_cpu_c...",True,"c-catalogue-db_cpu_usage_seconds_total,c-catal..."
9n6mf,sock-shop,pod-cpu-hog,catalogue-db,2,True,"c-catalogue-db_memory_usage_bytes,c-catalogue-...",True,"c-catalogue-db_cpu_usage_seconds_total,c-catal..."


In [35]:
import math

for row in check_results_df.reset_index().itertuples():
    if row.reduced_ok:
        continue
    record, filtered_df, anomalous_df, reduced_df = dataset_by_chaos_case[
        row.chaos_type, row.chaos_comp, row.chaos_case_num,
    ]

    _target_df = anomalous_df.apply(lambda x: scipy.stats.zscore(x)).filter(regex=f"{row.chaos_comp}")
    n, ncols = 6, 3
    nrows = math.ceil((_target_df.shape[1] / n) / ncols)
    fig, axs = plt.subplots(figsize=(20, 3.0 * nrows), ncols=ncols, nrows=nrows)
    fig.suptitle(f"anomalous: {row.chaos_type}/{row.chaos_comp}/{row.chaos_case_num}")
    fig.tight_layout()
    for i, ax in zip(range(0, _target_df.shape[1], n), axs.flatten()):
        for col, ts in _target_df.iloc[:, i : i+n].items():
            ax.plot(ts, label=col)
        ax.legend(loc='upper left', fontsize=8)
    plt.show()
    plt.close(fig=fig)

    _target_reduced_df = reduced_df.apply(lambda x: scipy.stats.zscore(x)).filter(regex=f"{row.chaos_comp}")
    n, ncols = 6, 3
    nrows = math.ceil((_target_reduced_df.shape[1] / n) / ncols)
    fig, axs = plt.subplots(figsize=(20, 3.0 * nrows), ncols=ncols, nrows=nrows)
    fig.suptitle(f"reduced: {row.chaos_type}/{row.chaos_comp}/{row.chaos_case_num}")
    fig.tight_layout()
    for i, ax in zip(range(0, _target_reduced_df.shape[1], n), axs.flatten()):
        for col, ts in _target_reduced_df.iloc[:, i : i+n].items():
            ax.plot(ts, label=col)
        ax.legend(loc='upper left', fontsize=8)
    plt.show()
    plt.close(fig=fig)