## Evaluation of clustering (shape-based)

- Use labeled data collected from anomaly_patterns_clustering_shape.ipynb.

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import jsonlines
import glob
from collections import defaultdict

In [3]:
for f in glob.glob("../samples/clustering_anomaly_patterns/*.jsonl"):
    with jsonlines.open(f) as reader:
        aggr = defaultdict(int)
        aggr2 = defaultdict(int)
        for obj in reader:
            aggr[obj["anomaly_pattern"]] += 1
            aggr2[obj["anomaly_position"]] += 1
    if len(aggr) < 1:
        continue
    display(f, aggr, aggr2)

'../samples/clustering_anomaly_patterns/clustering_anomaly_patterns_20221030-162851.jsonl'

defaultdict(int,
            {'Level shift down': 982,
             'Single spike': 458,
             'Level shift up': 550,
             'Multiple spikes': 321,
             'Single dip': 376,
             'Fluctuations': 86,
             'Other normal': 364,
             'Steady increase': 711,
             'Transient level shift up': 51,
             'Transient level shift down': 17,
             'White noise': 112,
             'Steady decrease': 52,
             'Sudden increase': 67,
             'Multiple dips': 12,
             'Sudden decrease': 49})

defaultdict(int,
            {'anomaly_during_fault': 3045,
             'anomaly_outside_fault': 695,
             'no_anomaly': 468})

'../samples/clustering_anomaly_patterns/clustering_anomaly_patterns_20221028-172414.jsonl'

defaultdict(int,
            {'Level shift up': 34,
             'Single dip': 49,
             'Single spike': 65,
             'Other normal': 12,
             'Multiple spikes': 34,
             'Steady increase': 8,
             'Transient level shift down': 4,
             'Transient level shift up': 10,
             'Level shift down': 10,
             'White noise': 8,
             'Multiple dips': 2,
             'Sudden increase': 2,
             'Fluctuations': 3,
             'Steady decrease': 3})

defaultdict(int,
            {'anomaly_during_fault': 149,
             'anomaly_outside_fault': 75,
             'no_anomaly': 20})

In [12]:
import numpy as np
import pandas as pd
import random
import scipy.interpolate
import scipy.stats

In [5]:
import sys
sys.path.append('../')

from tsdr import tsdr

INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [10]:
fpath = "../samples/clustering_anomaly_patterns/clustering_anomaly_patterns_20221030-162851.jsonl"

samples: dict = {}
time_series_by_case: dict[tuple[str, str], list[tuple[str, np.ndarray]]] = defaultdict(list)
with jsonlines.open(fpath) as reader:
    for obj in reader:
        time_series_by_case[(obj["chaos_type"], obj["chaos_comp"])].append((obj["metric"], np.array(obj["time_series"])))
        
        key = (obj["chaos_type"], obj["chaos_comp"], obj["metric"])
        samples[key] = {"series": np.array(obj["time_series"], dtype=np.float64)}
        apos, apattern = obj["anomaly_position"], obj["anomaly_pattern"]
        if apos == "no_anomaly" or apattern in ["White noise", "Other normal"]:
            samples[key].update({
                "anomaly_type": "type0",
                "anomaly_pattern": "normal",
                "anomaly_position": apos,
            })
        else:
            match apattern:
                # Type 1
                case "Level shift down" | "Level shift up" | "Steady decrease" | "Steady increase" | "Sudden decrease" | "Sudden increase":
                    samples[key].update({
                        "anomaly_type": "type1",
                        "anomaly_pattern": apattern,
                        "anomaly_position": apos,
                    })
                # Type 2
                case "Fluctuations" | "Multiple dips" | "Multiple spikes" | "Single dip" | "Single spike" | "Transient level shift down" | "Transient level shift up":
                    samples[key].update({
                        "anomaly_type": "type2",
                        "anomaly_pattern": apattern,
                        "anomaly_position": apos,
                    })

In [20]:
from meltria.priorknowledge import priorknowledge
from joblib import Parallel, delayed


def _clustering(time_series):
    pk = priorknowledge.new_knowledge(
        target_app="train-ticket",
        target_metric_types={
            "containers": True,
            "services": True,
            "middlewalres": True,
            "nodes": False,
        },
        mappings={"nodes-containers": {}},
    )
    metric_name_to_values = {metric: scipy.stats.zscore(values) for metric, values in time_series}
    _, clustering_info = tsdr.Tsdr("residual_integral", **{
        "step2_clustering_method_name": "dbscan",
        "step2_dbscan_min_pts": 2,
        "step2_dbscan_dist_type": "sbd",  # 'pearsonr' or 'sbd'
        "step2_dbscan_algorithm": "hdbscan",  # 'dbscan' or 'hdbscan'
        "step2_clustering_series_type": "raw",  # 'raw', 'anomaly_score' or 'binary_anomaly_score'
        "step2_clustering_choice_method": 'medoid',  # 'medoid' or 'maxsum'
    }).reduce_multivariate_series(pd.DataFrame(metric_name_to_values), pk, n_workers=1)
    return clustering_info

clustering_infos = Parallel(n_jobs=-1)(delayed(_clustering)(ts) for ts in time_series_by_case.values())

INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.

In [26]:
from itertools import combinations

eval_stat: list[tuple[str, str, int, str, int, int, int]] = []
for i, ((chaos_type, chaos_comp), time_series) in enumerate(time_series_by_case.items()):
    clustering_info = clustering_infos[i]
    for i, (representative_metric, sub_metrics) in enumerate(clustering_info.items(), start=1):
        positives, negatives = 0, 0
        for u, v in combinations([representative_metric] + sub_metrics, 2):
            u_atype: str = samples[chaos_type, chaos_comp, u]["anomaly_type"]
            v_atype: str = samples[chaos_type, chaos_comp, v]["anomaly_type"]
            if u_atype == v_atype:
                positives += 1
            else:
                negatives += 1

        eval_stat.append((chaos_type, chaos_comp, i, representative_metric, positives, negatives, len(sub_metrics)+1))

eval_df = pd.DataFrame(eval_stat, columns=["chaos_type", "chaos_comp", "cluster_no", "rep", "positives", "negatives", "total_metrics"])
eval_df

Unnamed: 0,chaos_type,chaos_comp,cluster_no,rep,positives,negatives,total_metrics
0,pod-memory-hog,ts-preserve-service,1,s-ts-preserve_request_duration_seconds,1,2,3
1,pod-memory-hog,ts-preserve-service,2,c-ts-preserve-service_fs_reads_bytes_total,3,0,3
2,pod-memory-hog,ts-preserve-service,3,m-ts-preserve-service_java_lang_GarbageCollect...,10,0,5
3,pod-memory-hog,ts-preserve-service,4,m-ts-preserve-service_java_lang_Threading_Curr...,141,135,24
4,pod-memory-hog,ts-preserve-service,5,m-ts-preserve-service_Tomcat_RequestProcessor_...,28,0,8
...,...,...,...,...,...,...,...
686,pod-network-loss,ts-price-mongo,52,m-ts-price-mongo_mongodb_top_remove_count,10,0,5
687,pod-network-loss,ts-price-mongo,53,m-ts-price-mongo_mongodb_ss_tcmalloc_tcmalloc_...,21,7,8
688,pod-network-loss,ts-price-mongo,54,m-ts-price-mongo_mongodb_sys_netstat_TcpExt_TC...,3,0,3
689,pod-network-loss,ts-price-mongo,55,m-ts-price-mongo_mongodb_top_writeLock_time,1,0,2
