# RCA - Benchmarks

In [None]:
import numpy as np
import rca_task
import baseline_anomaly_traversal as baseline
from dowhy.gcm.anomaly_scorers import MeanDeviationScorer, ITAnomalyScorer, MedianDeviationScorer, \
    RescaledMedianCDFQuantileScorer

In [None]:
anomaly_detection_config_mad_score = baseline.AnomalyDetectionConfig(anomaly_scorer=MedianDeviationScorer,
                                                            convert_to_p_value=False,
                                                            anomaly_score_threshold=5,
                                                            description='MADScore')
baseline_method = baseline.make_baseline_analyze_root_causes(anomaly_detection_config_mad_score)
df = rca_task.evaluate(baseline_method, '../dataset')

In [3]:
for scenario in ["low_traffic", "high_traffic", "temporal_traffic"]:
    for issue_metric in ['latency', 'availability']:
        df_sel = df[df['scenario'].str.startswith(scenario)]
        df_sel = df_sel[df_sel['metric'] == issue_metric]
        for k in [1, 3]:
            df_sel_k = df_sel[df_sel['topk'] == k]
            size_all = len(df_sel_k.intopk)
            res = np.mean(df_sel_k.intopk)
            size = len(df_sel_k.intopk)
            filtered = len(df_sel_k[df_sel_k['empty'] == True]) / size_all
            print(f"for {scenario} with {issue_metric} with {size_all} ({filtered:.2f} empty) many issues at top{k} got {res:.3f}")
        

for low_traffic with latency with 14 (0.21 empty) many issues at top1 got 0.571
for low_traffic with latency with 14 (0.21 empty) many issues at top3 got 0.571
for low_traffic with availability with 12 (0.00 empty) many issues at top1 got 0.833
for low_traffic with availability with 12 (0.00 empty) many issues at top3 got 1.000
for high_traffic with latency with 14 (0.14 empty) many issues at top1 got 0.571
for high_traffic with latency with 14 (0.14 empty) many issues at top3 got 0.786
for high_traffic with availability with 12 (0.00 empty) many issues at top1 got 0.667
for high_traffic with availability with 12 (0.00 empty) many issues at top3 got 1.000
for temporal_traffic with latency with 8 (0.00 empty) many issues at top1 got 1.000
for temporal_traffic with latency with 8 (0.00 empty) many issues at top3 got 1.000
for temporal_traffic with availability with 8 (0.00 empty) many issues at top1 got 0.750
for temporal_traffic with availability with 8 (0.00 empty) many issues at top3 

### Evaluate Specificity on normal operations

In [None]:
df_specifcity = rca_task.evaluate_specificity(baseline_method, '../dataset')

In [5]:
for scenario in ["low_traffic", "high_traffic", "temporal_traffic"]:
    for issue_metric in ['latency', 'availability']:
        df_sel = df_specifcity[df_specifcity['scenario'].str.startswith(scenario)]
        df_sel = df_sel[df_sel['metric'] == issue_metric]
        print(f"for {scenario} with {issue_metric} got specificity of {np.mean(df_sel.specificity):.3f}")


for low_traffic with latency got specificity of 0.167
for low_traffic with availability got specificity of 1.000
for high_traffic with latency got specificity of 0.333
for high_traffic with availability got specificity of 1.000
for temporal_traffic with latency got specificity of 0.600
for temporal_traffic with availability got specificity of 1.000
