In [1]:
import pandas as pd
import logging
import networkx as nx
import time
import json


from causal_reasoning.causal_model import CausalModel

In [None]:
class Query:
    def __init__(self, inter_label: str, inter_value: int, target_label: str, target_value: int, query_value: float) -> None:
        self.intervention_label = inter_label
        self.intervention_value = inter_value
        self.target_label = target_label
        self.target_value = target_value
        self.query = query_value

def run_queries(nodes_set: set, model: CausalModel, empirical_probs, output_path: str):
    with open(output_path, 'w') as file:
        queries = []
        for intervention in nodes_set:
            for target in nodes_set:
                if intervention == target:
                    continue
                intervention_value = 0
                target_value = 0
                model.set_interventions([(intervention, intervention_value)])
                model.set_target((target, target_value))
                start = time.perf_counter()
                pn = model.inference_intervention_query()
                end = time.perf_counter()
                pn_time = end-start
                queries.append(Query(intervention, intervention_value, target, target_value, pn))


                intervention_value = 1
                target_value = 1
                model.set_interventions([(intervention, intervention_value)])
                model.set_target((target, target_value))
                start = time.perf_counter()
                ps = model.inference_intervention_query()
                end = time.perf_counter()
                ps_time = end-start
                queries.append(Query(intervention, intervention_value, target, target_value, ps))
                
                # file.write(f"PS = P({target}={target_value}|do({intervention}={intervention_value}))\n")
                file.write(f"PS = P({target}={1}|do({intervention}={1}))\n")
                file.write(f"{ps[0]} <= PS <= {ps[1]}\n")
                file.write(f"PS Inference time taken: {ps_time:.6f} seconds\n")
                file.write("----\n")
                file.write(f"PN = P({target}={0}|do({intervention}={0}))\n")
                file.write(f"{pn[0]} <= PN <= {pn[1]}\n")
                file.write(f"PN Inference time taken: {pn_time:.6f} seconds\n")
                file.write("----\n")

                empirical_ps = empirical_probs[intervention][1][target]
                empirical_pn = empirical_probs[intervention][0][target]
                
                if ps[0] == 'None' or ps[0] is None or ps[1] == 'None' or ps[1] is None or pn[0] == 'None' or pn[0] is None or pn[1] == 'None' or pn[1] is None: 
                    file.write(f"PNS = P({target}|do({intervention}))\n")
                    file.write(f"PNS == None\n")
                    file.write("---------------------------------------------------------\n")
                    continue

                ps_lower = empirical_ps*float(ps[0])
                ps_upper = empirical_ps*float(ps[1])
                pn_lower = empirical_pn*float(pn[0])
                pn_upper = empirical_pn*float(pn[1])

                pns = [0,0]
                pns[0] = min(ps_lower+pn_lower, ps_lower+pn_upper)
                pns[1] = max(ps_upper+pn_lower, ps_upper+pn_upper)
                file.write(f"PNS = P({target}|do({intervention}))\n")
                file.write(f"{pn[0]} <= PNS <= {pn[1]}\n")
                file.write("---------------------------------------------------------\n")


def run_pn_ps(nodes_set: set, model: CausalModel, target: str, output_path: str):
    pn_ps = {}
    for intervention in nodes_set:
        if intervention == target:
            continue
        pn_ps[intervention] = {}

        intervention_value = 0
        target_value = 0
        model.set_interventions([(intervention, intervention_value)])
        model.set_target((target, target_value))
        start = time.perf_counter()
        pn = model.inference_intervention_query()
        end = time.perf_counter()
        pn_time = end-start
        pn_ps[intervention]["PN"] = pn

        intervention_value = 1
        target_value = 1
        model.set_interventions([(intervention, intervention_value)])
        model.set_target((target, target_value))
        start = time.perf_counter()
        ps = model.inference_intervention_query()
        end = time.perf_counter()
        ps_time = end-start
        pn_ps[intervention]["PS"] = ps
            
    with open(f'{output_path}.json', 'w') as f:
        json.dump(pn_ps, f, indent=4)



## First Model

In [3]:
edges_list_1 = [
    ('NewDeploy', 'MemoryLeak'),
    ('MemoryLeak', 'MemoryUsageHigh'), 
    ("MemoryUsageHigh", "ServiceCrash"), 
    ("ServiceCrash", "OutageIncident"),
    ("HeavyTraffic", "MemoryUsageHigh"), 
    ("HeavyTraffic", "ServiceCrash"),
    # UNOBS
    ('Unob_helper_1', 'MemoryLeak'),
    ('Unob_helper_2', 'OutageIncident'),
    ('Unob_helper_3', 'NewDeploy'),
]
latent_nodes_1 = ['HeavyTraffic', 'Unob_helper_1', 'Unob_helper_2', 'Unob_helper_3']
nodes_set_1 = set()
for tuple in edges_list_1:
    if tuple[0] not in latent_nodes_1:
        nodes_set_1.add(tuple[0])
    if tuple[1] not in latent_nodes_1:
        nodes_set_1.add(tuple[1])

edges_1 = nx.DiGraph(edges_list_1)

df_small_scale_model = pd.read_csv("small_scale_outage_incident_seed42.csv", index_col=0)

In [4]:
empirical_probs_model_1 = {}
for intervention in nodes_set_1:
    empirical_probs_model_1[intervention] = [{},{}]
    for target in nodes_set_1:
        if target == intervention:
            continue
        empirical_probs_model_1[intervention][0][target] = df_small_scale_model[(df_small_scale_model[intervention] == 0) & (df_small_scale_model[target] == 0)].shape[0]
        empirical_probs_model_1[intervention][1][target] = df_small_scale_model[(df_small_scale_model[intervention] == 1) & (df_small_scale_model[target] == 1)].shape[0]

In [5]:
model_1 = CausalModel(
    data=df_small_scale_model,
    edges=edges_1,
    unobservables_labels=latent_nodes_1,
)
# run_queries(nodes_set_1, model_1, empirical_probs_model_1, "output_small_scale_outage_incident_seed42.txt")
run_pn_ps(nodes_set_1, model_1, "OutageIncident", "output_small_scale_outage_incident_seed42")

## Second Model


In [6]:
edges_list_2 = [
    ("DB_Change", "DB_Latency"),
    ("DB_Latency", "MS-B_Latency"), 
    ("MS-B_Latency", "MS-B_Error"), 
    ("MS-B_Latency", "MS-A_Latency"),
    ("MS-B_Error", "MS-A_Error"), 
    ("MS-A_Latency", "MS-A_Threads"), 
    ("MS-A_Threads", "MS-A_Crash"), 
    ("MS-A_Error", "Outage"), 
    ("MS-A_Crash", "Outage"), 
    ("HeavyTraffic", "DB_Latency"), 
    ("HeavyTraffic", "MS-A_Latency"),
    # UNOBS
    ('Unob_helper_1', 'DB_Change'),
    ('Unob_helper_2', 'MS-B_Latency'),
    ('Unob_helper_3', 'MS-B_Error'),
    ('Unob_helper_4', 'MS-A_Error'),
    ('Unob_helper_5', 'MS-A_Threads'),
    ('Unob_helper_6', 'MS-A_Crash'),
    ('Unob_helper_7', 'Outage'),
]

latent_nodes_2 = ['HeavyTraffic', 'Unob_helper_1', 'Unob_helper_2', 'Unob_helper_3', 'Unob_helper_4', 'Unob_helper_5', 'Unob_helper_6', 'Unob_helper_7']
nodes_set_2 = set()
for tuple in edges_list_2:
    if tuple[0] not in latent_nodes_2:
        nodes_set_2.add(tuple[0])
    if tuple[1] not in latent_nodes_2:
        nodes_set_2.add(tuple[1])

edges_2 = nx.DiGraph(edges_list_2)

df_medium_scale_incident = pd.read_csv("medium_scale_outage_incident_seed42.csv", index_col=0)

In [7]:
empirical_probs_model_2 = {}
for intervention in nodes_set_2:
    empirical_probs_model_2[intervention] = [{},{}]
    for target in nodes_set_2:
        if target == intervention:
            continue
        empirical_probs_model_2[intervention][0][target] = df_medium_scale_incident[(df_medium_scale_incident[intervention] == 0) & (df_medium_scale_incident[target] == 0)].shape[0]
        empirical_probs_model_2[intervention][1][target] = df_medium_scale_incident[(df_medium_scale_incident[intervention] == 1) & (df_medium_scale_incident[target] == 1)].shape[0]

In [8]:
model_2 = CausalModel(
    data=df_medium_scale_incident,
    edges=edges_2,
    unobservables_labels=latent_nodes_2,
)
# run_queries(nodes_set_2, model_2, empirical_probs_model_2, "output_medium_scale_outage_incident_seed42.txt")
run_pn_ps(nodes_set_2, model_2, "Outage", "output_medium_scale_outage_incident_seed42")