In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import datetime
import random

import scipy.stats as stats
from causal_gen.random_dist import  RandomCausalDataGeneratorTS
from causal_gen.basic_ts_with_outlier import merge_node_data_with_outliers

## Generating Dataset for Online Simulation

In [2]:
seed = 1300
random.seed(seed)
exp_name = f'rand_syn_5node_X1_X5_seed{seed}'
n_data = 120
root_cause = 'X1'
target_node = 'X5'
features = ('X1', 'X2', 'X3', 'X4', 'X5')
causal_graph = nx.DiGraph([('X1', 'X2'), 
                          ('X2', 'X4'),
                          ('X3', 'X4'),
                          ('X4', 'X5')])

basic_time = datetime.datetime.now().timestamp()
time_propagation = 1.0
outlier_fraction = 0.01
n_outliers = int(outlier_fraction * n_data)
n_outliers
noise_dists = {
                stats.norm: (),
                stats.uniform: (),
                stats.expon: (),
                stats.beta: (random.uniform(0.5, 2.0), random.uniform(0.5, 2.0))
              }
outgen =  RandomCausalDataGeneratorTS(causal_graph, 
                                      noise_dists,
                                      basic_time, 
                                      n_data, 
                                      time_propagation, 
                                      n_outliers, 
                                      outlier_root_cause_node=root_cause, 
                                      outlier_multiplier=3, 
                                      outlier_position=None,
                                      seed=seed)

node_data = outgen.generate_data_with_outliers()
df = merge_node_data_with_outliers(node_data = node_data, 
                                  causal_graph = causal_graph, 
                                  target_node = target_node,
                                  time_propagation = time_propagation)

print(df.columns)
print(df.shape)
l_features = list(features)
l_features.append('label')
l_features.append('root_cause_gt')
l_features.append('ts')
df = df.loc[:, l_features]
df.to_csv(f'{exp_name}.csv', index=False, sep=',')
outgen.node_noise_dists

self.sorted_nodes are ['X1', 'X3', 'X2', 'X4', 'X5']
node X1 is a root cause
Index(['X5', 'ts', 'X5_root_cause', 'label', 'root_cause_gt', 'X1',
       'X1_root_cause', 'X2', 'X2_root_cause', 'X4', 'X4_root_cause', 'X3',
       'X3_root_cause'],
      dtype='object')
(117, 13)


{'X1': 'expon', 'X3': 'expon', 'X2': 'beta', 'X4': 'norm', 'X5': 'norm'}

In [3]:
## Generating Dataset for Init Data

In [4]:
exp_name = f'init_rand_syn_5node_X1_X5_seed{seed}'
n_data = 120
root_cause = sorted_nodes[0]
outgen =  RandomCausalDataGeneratorTS(causal_graph, 
                                      noise_dists,
                                      basic_time, 
                                      n_data, 
                                      time_propagation, 
                                      n_outliers=0, 
                                      outlier_root_cause_node=root_cause, 
                                      outlier_multiplier=3, 
                                      outlier_position=None,
                                      seed=seed)

node_data = outgen.generate_data_with_outliers()
df = merge_node_data_with_outliers(node_data = node_data, 
                                  causal_graph = causal_graph, 
                                  target_node = target_node,
                                  time_propagation = time_propagation)
l_features = list(features)
l_features.append('label')
l_features.append('root_cause_gt')
df = df.loc[:, l_features]
df.to_csv(f'{exp_name}.csv', index=False, sep=',')
outgen.node_noise_dists

self.sorted_nodes are ['X1', 'X3', 'X2', 'X4', 'X5']


{'X1': 'expon', 'X3': 'expon', 'X2': 'beta', 'X4': 'norm', 'X5': 'norm'}