In [1]:
import os
import pandas as pd
import numpy as np
import networkx as nx
import datetime
import random

import json
import networkx as nx
import matplotlib.pyplot as plt

import scipy.stats as stats

from causal_gen.random_dist import  RandomCausalDataGeneratorTS
from causal_gen.basic_ts_with_outlier import merge_node_data_with_outliers

In [2]:
def generate_dataset(possible_root_causes, 
                     num_nodes,
                     time_propagation,
                     n_data,
                     init_n_data,
                     outlier_fraction,
                     target_node,
                     random_seed,
                     noise_dists,
                     folder,
                    ):
    cwd = os.getcwd()
    result_folder = os.path.join(cwd, folder)
    if not os.path.exists(result_folder):
        os.makedirs(result_folder)
        
    for root_cause in possible_root_causes:
        bname = f'{num_nodes}_nodes_{root_cause}_{target_node}_s{random_seed}'
        basic_time = datetime.datetime.now().timestamp()
        n_outliers = int(outlier_fraction * n_data)
        outgen =  RandomCausalDataGeneratorTS(causal_graph, 
                                              noise_dists,
                                              basic_time, 
                                              n_data, 
                                              time_propagation, 
                                              n_outliers=n_outliers, 
                                              outlier_root_cause_node=root_cause, 
                                              outlier_multiplier=3, 
                                              outlier_position=None,
                                              seed=random_seed)

        node_data = outgen.generate_data_with_outliers()
        all_df = merge_node_data_with_outliers(node_data = node_data, 
                                          causal_graph = causal_graph, 
                                          target_node = target_node,
                                          time_propagation = time_propagation)
        l_features = list(nodes)
        l_features.append('label')
        l_features.append('root_cause_gt')
        all_df = all_df.loc[:, l_features]
        all_df.to_csv(f'{result_folder}/{bname}.csv', sep=',', index=False)

        metadata = {"target_node" : target_node,
                    "nodes" : nodes,
                    "edges" : list(causal_graph.edges),
                    "root_causes" : root_cause,
                   }

        with open(f'{result_folder}/{bname}.json', 'w') as fp:
            json.dump(metadata, fp)

        print(f'root_cause {root_cause} : {outgen.node_noise_dists}')
        
        exp_name = f'init_{bname}'
        init_n_data = 120 + num_nodes - 1
        basic_time = datetime.datetime.now().timestamp()
        outgen_init =  RandomCausalDataGeneratorTS(causal_graph, 
                                              noise_dists,
                                              basic_time, 
                                              init_n_data, 
                                              time_propagation, 
                                              n_outliers=0, 
                                              outlier_root_cause_node=root_cause, 
                                              outlier_multiplier=3, 
                                              outlier_position=None,
                                              seed=random_seed)

        init_node_data = outgen_init.generate_data_with_outliers()
        df = merge_node_data_with_outliers(node_data = init_node_data, 
                                          causal_graph = causal_graph, 
                                          target_node = target_node,
                                          time_propagation = time_propagation)
        df = df.loc[:, l_features]
        df.to_csv(f'{result_folder}/{exp_name}.csv', index=False, sep=',')
        outgen_init.node_noise_dists
        print(f'root_cause init {root_cause} : {outgen_init.node_noise_dists}')

In [3]:
## let's create straight edges first
time_propagation = 1.0

noise_dists = { stats.norm: (),
                stats.uniform: (),
                stats.expon: (),
                stats.halfnorm: (),
                stats.beta: (random.uniform(0.5, 2.0), random.uniform(0.5, 2.0)),
              }

num_nodes = 7
nodes = [f'X{i}' for i in range(1, num_nodes +1)]
possible_root_causes = ['X1',]
target_node = nodes[-1]

n_data = 2880 + num_nodes - 1 
init_n_data = 960 + num_nodes - 1 
outlier_fraction = 0.02

edges = [('X1', 'X2'),
         ('X2', 'X3'),
         ('X1', 'X4'),
         ('X4', 'X5'),
         ('X5', 'X6'),
         ('X6', 'X7'),
        ]

causal_graph = nx.DiGraph(edges)
folder = f'random/branch'

In [4]:
for random_seed in range(7):
    generate_dataset(possible_root_causes, 
                 num_nodes,
                 time_propagation,
                 n_data,
                 init_n_data,
                 outlier_fraction,
                 target_node,
                 random_seed,
                 noise_dists,
                 folder
                )

root_cause X1 : {'X1': 'halfnorm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'expon', 'X5': 'beta', 'X6': 'halfnorm', 'X7': 'halfnorm'}
root_cause init X1 : {'X1': 'halfnorm', 'X2': 'halfnorm', 'X4': 'norm', 'X3': 'expon', 'X5': 'beta', 'X6': 'halfnorm', 'X7': 'halfnorm'}
root_cause X1 : {'X1': 'uniform', 'X2': 'beta', 'X4': 'norm', 'X3': 'expon', 'X5': 'norm', 'X6': 'halfnorm', 'X7': 'halfnorm'}
root_cause init X1 : {'X1': 'uniform', 'X2': 'beta', 'X4': 'norm', 'X3': 'expon', 'X5': 'norm', 'X6': 'halfnorm', 'X7': 'halfnorm'}
root_cause X1 : {'X1': 'norm', 'X2': 'norm', 'X4': 'norm', 'X3': 'expon', 'X5': 'uniform', 'X6': 'expon', 'X7': 'expon'}
root_cause init X1 : {'X1': 'norm', 'X2': 'norm', 'X4': 'norm', 'X3': 'expon', 'X5': 'uniform', 'X6': 'expon', 'X7': 'expon'}
root_cause X1 : {'X1': 'uniform', 'X2': 'beta', 'X4': 'beta', 'X3': 'uniform', 'X5': 'expon', 'X6': 'beta', 'X7': 'halfnorm'}
root_cause init X1 : {'X1': 'uniform', 'X2': 'beta', 'X4': 'beta', 'X3': 'uniform', 'X5': 'expon', 