In [1]:
import os
import pandas as pd
import numpy as np
import networkx as nx
import datetime
import random

import json
import networkx as nx
import matplotlib.pyplot as plt

import scipy.stats as stats

from causal_gen.random_dist import  RandomCausalDataGeneratorTS
from causal_gen.basic_ts_with_outlier import merge_node_data_with_outliers

In [2]:
def generate_dataset(possible_root_causes, 
                     num_nodes,
                     time_propagation,
                     n_data,
                     init_n_data,
                     outlier_fraction,
                     target_node,
                     random_seed,
                     noise_dists,
                     folder,
                    ):
    cwd = os.getcwd()
    result_folder = os.path.join(cwd, folder)
    if not os.path.exists(result_folder):
        os.makedirs(result_folder)
        
    for root_cause in possible_root_causes:
        bname = f'{num_nodes}_nodes_{root_cause}_{target_node}'
        basic_time = datetime.datetime.now().timestamp()
        n_outliers = int(outlier_fraction * n_data)
        outgen =  RandomCausalDataGeneratorTS(causal_graph, 
                                              noise_dists,
                                              basic_time, 
                                              n_data, 
                                              time_propagation, 
                                              n_outliers=n_outliers, 
                                              outlier_root_cause_node=root_cause, 
                                              outlier_multiplier=3, 
                                              outlier_position=None,
                                              seed=random_seed)

        node_data = outgen.generate_data_with_outliers()
        all_df = merge_node_data_with_outliers(node_data = node_data, 
                                          causal_graph = causal_graph, 
                                          target_node = target_node,
                                          time_propagation = time_propagation)
        l_features = list(nodes)
        l_features.append('label')
        l_features.append('root_cause_gt')
        all_df = all_df.loc[:, l_features]
        all_df.to_csv(f'{result_folder}/{bname}.csv', sep=',', index=False)

        metadata = {"target_node" : target_node,
                    "nodes" : nodes,
                    "edges" : list(causal_graph.edges),
                    "root_causes" : root_cause,
                   }

        with open(f'{result_folder}/{bname}.json', 'w') as fp:
            json.dump(metadata, fp)

        print(f'root_cause {root_cause} : {outgen.node_noise_dists}')
        
        exp_name = f'init_{bname}'
        init_n_data = 120 + num_nodes - 1
        basic_time = datetime.datetime.now().timestamp()
        outgen_init =  RandomCausalDataGeneratorTS(causal_graph, 
                                              noise_dists,
                                              basic_time, 
                                              init_n_data, 
                                              time_propagation, 
                                              n_outliers=0, 
                                              outlier_root_cause_node=root_cause, 
                                              outlier_multiplier=3, 
                                              outlier_position=None,
                                              seed=random_seed)

        init_node_data = outgen_init.generate_data_with_outliers()
        df = merge_node_data_with_outliers(node_data = init_node_data, 
                                          causal_graph = causal_graph, 
                                          target_node = target_node,
                                          time_propagation = time_propagation)
        df = df.loc[:, l_features]
        df.to_csv(f'{result_folder}/{exp_name}.csv', index=False, sep=',')
        outgen_init.node_noise_dists
        print(f'root_cause init {root_cause} : {outgen_init.node_noise_dists}')

In [3]:
## let's create straight edges first
random_seed = 46
time_propagation = 1.0

noise_dists = { stats.norm: (),
                stats.uniform: (),
                stats.expon: (),
                stats.halfnorm: (),
                stats.beta: (random.uniform(0.5, 2.0), random.uniform(0.5, 2.0))
              }

num_nodes = 3
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1','X3')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X3'),
         ('X1', 'X2'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X3': 'halfnorm', 'X2': 'norm'}
root_cause init X1 : {'X1': 'norm', 'X3': 'halfnorm', 'X2': 'norm'}
root_cause X3 : {'X1': 'norm', 'X3': 'halfnorm', 'X2': 'norm'}
root_cause init X3 : {'X1': 'norm', 'X3': 'halfnorm', 'X2': 'norm'}


In [4]:
num_nodes = 4
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1','X3','X4')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X1', 'X3'),
         ('X3', 'X4'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta'}
root_cause X3 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta'}
root_cause init X3 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta'}


In [5]:
num_nodes = 5
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X3', 'X4')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X1', 'X3'),
         ('X3', 'X4'),
         ('X4', 'X5'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta', 'X5': 'beta'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta', 'X5': 'beta'}
root_cause X3 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta', 'X5': 'beta'}
root_cause init X3 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta', 'X5': 'beta'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta', 'X5': 'beta'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X3': 'norm', 'X4': 'beta', 'X5': 'beta'}


In [6]:
num_nodes = 6
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X4', 'X5')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform'}
root_cause X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform'}
root_cause init X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform'}


In [7]:
num_nodes = 7
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X4', 'X5')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
         ('X6', 'X7'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta'}
root_cause X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta'}
root_cause init X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta'}


In [8]:
num_nodes = 8
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X4', 'X5')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
         ('X6', 'X7'),
         ('X7', 'X8'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform'}
root_cause X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform'}
root_cause init X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform'}


In [9]:
num_nodes = 9
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X4', 'X5')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
         ('X6', 'X7'),
         ('X7', 'X8'),
         ('X8', 'X9'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta'}
root_cause X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta'}
root_cause init X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta'}


In [10]:
num_nodes = 10
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X4', 'X5')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
         ('X6', 'X7'),
         ('X7', 'X8'),
         ('X8', 'X9'),
         ('X9', 'X10'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta'}
root_cause X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta'}
root_cause init X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 

In [11]:
num_nodes = 11
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X4', 'X5')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
         ('X6', 'X7'),
         ('X7', 'X8'),
         ('X8', 'X9'),
         ('X9', 'X10'),
         ('X10', 'X11'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm'}
root_cause X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm'}
root_cause init X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': '

In [12]:
num_nodes = 12
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ('X1', 'X4', 'X5')
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
         ('X6', 'X7'),
         ('X7', 'X8'),
         ('X8', 'X9'),
         ('X9', 'X10'),
         ('X10', 'X11'),
         ('X11', 'X12'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'{num_nodes}_nodes/branch'

generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm', 'X12': 'norm'}
root_cause init X1 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm', 'X12': 'norm'}
root_cause X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm', 'X12': 'norm'}
root_cause init X4 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm', 'X12': 'norm'}
root_cause X5 : {'X1': 'norm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'beta', 'X5': 'beta', 'X6': 'uniform', 'X7': 'beta', 'X8': 'uniform', 'X9': 'beta', 'X10': 'beta', 'X11': 'norm', 'X12': 'norm'}
