In [1]:
import pickle
import pandas as pd
import networkx as nx

from d2c import D2C

In [2]:
def create_lagged_multiple_ts(observations, maxlags):
    #create lagged observations for all the available time series
    lagged_observations = []
    for obs in observations:
        lagged = obs.copy()
        for i in range(1,maxlags+1):
            lagged = pd.concat([lagged, obs.shift(i)], axis=1)
        lagged.columns = [i for i in range(len(lagged.columns))]
        lagged_observations.append(lagged.dropna())
    return lagged_observations

def rename_dags(dags,n_variables):
    #rename the nodes of the dags to use the same convention as the descriptors
    updated_dags = []
    for dag in dags:
        mapping = {node: int(node.split('_')[0]) + int(node.split('-')[1]) * n_variables for node in dag.nodes()} #from x_(t-y) to x + y*n_variables
        dag = nx.relabel_nodes(dag, mapping)
        updated_dags.append(dag)
    return updated_dags

In [3]:
n_jobs=1
output_folder=f"."
couples_to_consider_per_dag = 60
MB_size = 5
n_jobs = 1 
n_variables = 3
maxlags = 3


In [4]:
with open(f"data_1.pkl", "rb") as f:
    observations,dags,_,_ = pickle.load(f)


In [5]:
lagged_observations = create_lagged_multiple_ts(observations, maxlags)
updated_dags = rename_dags(dags, n_variables)

In [6]:
d2c = D2C(updated_dags, lagged_observations, couples_to_consider_per_dag, MB_size, n_jobs=n_jobs, n_variables=n_variables, maxlags=maxlags)

In [7]:
d2c.initialize()


In [8]:
descriptors = pd.DataFrame(d2c.x_y)

In [9]:
descriptors

Unnamed: 0,graph_id,edge_source,edge_dest,is_causal,com_cau,coeff_cause,coeff_eff,cau_eff,eff_cau,eff_cau_mbeff,...,n_features,n_features/n_samples,kurtosis_ca,kurtosis_ef,skewness_ca,skewness_ef,HOC_1_2,HOC_2_1,HOC_1_3,HOC_3_1
0,0,4,0,1,0.172278,0.050907,0.018015,0.172278,0.175198,0.009390,...,12,0.048387,-0.633793,-0.925742,-0.360350,-0.370486,0.103747,-0.119193,0.690380,0.811335
1,0,0,4,0,0.175198,0.018015,0.050907,0.175198,0.172278,0.002704,...,12,0.048387,-0.925742,-0.633793,-0.370486,-0.360350,-0.119193,0.103747,0.811335,0.690380
2,0,10,4,1,0.176855,-0.019412,-0.044834,0.176855,0.176671,0.001397,...,12,0.048387,-0.634494,-0.633793,-0.354062,-0.360350,0.496598,-0.373555,-1.043212,-0.659181
3,0,4,10,0,0.176671,-0.044834,-0.019412,0.176671,0.176855,0.003569,...,12,0.048387,-0.633793,-0.634494,-0.360350,-0.354062,-0.373555,0.496598,-0.659181,-1.043212
4,0,10,3,1,0.202578,-0.087630,-0.088469,0.202578,0.206857,0.030049,...,12,0.048387,-0.634494,-0.925713,-0.354062,-0.370491,0.521268,-0.427882,-1.071776,-0.709436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2751,49,11,0,0,0.609064,-0.087871,-0.085667,0.609064,0.608718,0.041143,...,12,0.048387,-0.800242,-0.910174,-0.389572,-0.326444,0.128464,0.061249,-1.385600,-1.328335
2752,49,11,2,0,0.568490,-0.025798,-0.065976,0.568490,0.567158,0.000000,...,12,0.048387,-0.800242,-0.772510,-0.389572,-0.403605,0.184222,0.037226,-1.383457,-1.245392
2753,49,9,2,0,0.571026,-0.005501,-0.068316,0.571026,0.568832,0.000000,...,12,0.048387,-0.770937,-0.772510,-0.383032,-0.403605,0.170180,0.058966,-1.352113,-1.274958
2754,49,11,1,0,0.601037,-0.071871,-0.088844,0.601037,0.601188,0.022689,...,12,0.048387,-0.800242,-0.797324,-0.389572,-0.391177,0.174268,0.045717,-1.395730,-1.324532


In [10]:
for col in descriptors.columns:
    if pd.api.types.is_numeric_dtype(descriptors[col]):
        descriptors[col] = descriptors[col].round(3)

descriptors.to_csv('descriptors.csv', index=False)