In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import datetime
from scipy.stats import truncexpon, halfnorm

In [2]:
features = ('X1', 'X2', 'X3', 'X4', 'X5')
causal_graph = nx.DiGraph([('X1', 'X2'), 
                          ('X2', 'X3'),
                          ('X3', 'X4'),
                          ('X4', 'X5')])
#gcm.util.plot(causal_graph, figure_size=[13, 13])
causal_graph.nodes
type(causal_graph)

networkx.classes.digraph.DiGraph

In [3]:
def find_root_children_nodes(causal_graph):
    root = list()
    parents = dict()
    for node in causal_graph.nodes:
        predecessors = list(causal_graph.predecessors(node))
        if predecessors:
            parents[node] = predecessors
            print(f'child node {node} of parents {predecessors}')
        else:
            root.append(node)
            print(f'root node : {node}')
    return root, parents
root, node_parents = find_root_children_nodes(causal_graph)
print(f'root : {root}')
print(f'parents : {node_parents}')

root node : X1
child node X2 of parents ['X1']
child node X3 of parents ['X2']
child node X4 of parents ['X3']
child node X5 of parents ['X4']
root : ['X1']
parents : {'X2': ['X1'], 'X3': ['X2'], 'X4': ['X3'], 'X5': ['X4']}


In [4]:
time_propagation = 1 # in second
basic_time = datetime.datetime.now().timestamp()
n_data = 100

In [5]:
def generate_root_data(node, node_data, start_ts, n_data, time_propagation):
    ts = np.arange(start=start_ts, 
                   stop=start_ts + n_data * time_propagation, 
                   step=time_propagation).reshape(-1,1)
    data = truncexpon.rvs(size=n_data, b=3, scale=0.2).reshape(-1,1)
    data_ts = np.hstack((data, ts))
    node_data[node] = {'data' : pd.DataFrame(data_ts, columns=(node, f'ts')), 
                       'start_ts' : start_ts,}  
node_data = {}
generate_root_data(node='X1', 
                   node_data=node_data, 
                   start_ts=basic_time, 
                   n_data=n_data, 
                   time_propagation=time_propagation)
node_data['X1']['data'].head()

Unnamed: 0,X1,ts
0,0.049228,1675704000.0
1,0.077307,1675704000.0
2,0.080297,1675704000.0
3,0.020479,1675704000.0
4,0.026184,1675704000.0


In [6]:
def generate_child_data(node, parents, node_data, n_data, time_propagation):
    data = halfnorm.rvs(size=n_data, loc=0.5, scale=0.2).reshape(-1,1)
    parent_start_ts = list()
    
    for parent in parents:
        if parent in node_data.keys():
            parent_start_ts.append(node_data[parent]['start_ts'])
        else:
            print(f'parent {parent} of node {node} has no data')
            
    start_ts = max(parent_start_ts) + time_propagation
    ts = np.arange(start=start_ts, 
                   stop=start_ts + n_data * time_propagation, 
                   step=time_propagation).reshape(-1,1)
    
    for parent in parents:
        if parent in node_data.keys():
            data += node_data[parent]['data'][parent].values.reshape(-1,1)
        else:
            print(f'parent {parent} of node {node} has no data')
    
    data_ts = np.hstack((data, ts))
    node_data[node] = {'data' : pd.DataFrame(data_ts, columns=(node, f'ts')), 
                       'start_ts' : start_ts}

parents = node_parents['X2']
generate_child_data('X2', parents, node_data, n_data, time_propagation)
print(node_data['X1']['data']['ts'][0])
print(node_data['X2']['data']['ts'][0])

1675703887.108593
1675703888.108593


In [7]:
def generate_data(causal_graph, basic_time, n_data, time_propagation):
    node_data = dict()
    root, node_parents = find_root_children_nodes(causal_graph)
    for node in causal_graph.nodes:
        if node in root:
            generate_root_data(node, node_data, basic_time, n_data, time_propagation)
        else:
            parents = node_parents[node]
            generate_child_data(node, parents, node_data, n_data, time_propagation)
    return node_data

In [8]:
features = ('X1', 'X2', 'X3', 'X4', 'X5')
causal_graph = nx.DiGraph([('X1', 'X2'), 
                          ('X2', 'X3'),
                          ('X3', 'X4'),
                          ('X4', 'X5')])
time_propagation = 1 # in second
basic_time = datetime.datetime.now().timestamp()
n_data = 100
node_data = generate_data(causal_graph, basic_time, n_data, time_propagation)

root node : X1
child node X2 of parents ['X1']
child node X3 of parents ['X2']
child node X4 of parents ['X3']
child node X5 of parents ['X4']


In [9]:
print(node_data['X1']['data']['ts'][0])
print(node_data['X2']['data']['ts'][0])
print(node_data['X3']['data']['ts'][0])
print(node_data['X4']['data']['ts'][0])
print(node_data['X5']['data']['ts'][0])

1675703887.170319
1675703888.170319
1675703889.170319
1675703890.170319
1675703891.170319


In [10]:
df = pd.merge(node_data['X1']['data'], node_data['X2']['data'], on='ts')

In [11]:
df.shape

(99, 3)

In [12]:
def merge_node_data(node_data, causal_graph):
    first = True
    for node in causal_graph.nodes:
        if first:
            df = node_data[node]['data']
            first = False
        else:
            df = pd.merge(df, node_data[node]['data'], on='ts')
    return df
df = merge_node_data(node_data, causal_graph)

In [13]:
df.shape

(96, 6)

In [14]:
df.iloc[0]['ts']

1675703891.170319