In [1]:
from sklearn.model_selection import cross_validate, LeaveOneGroupOut
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import networkx as nx


In [2]:
df = pd.read_csv('descriptors.csv')

In [3]:
# Populate the graph with edges and attributes
graphs = {}
for _, row in df.iterrows():
    # Check if graph_id exists, if not create a new DiGraph
    if row['graph_id'] not in graphs:
        graphs[row['graph_id']] = nx.DiGraph()
    G = graphs[row['graph_id']]
    
    # Add an edge if is_causal is True
    if row['is_causal']: 
        G.add_edge(row['edge_source'], row['edge_dest'])
    else:
        G.add_node(row['edge_source'])
        G.add_node(row['edge_dest'])


In [4]:
from graphviz import Digraph

for key, graph in graphs.items():
    G_dot = Digraph(engine="dot",format='png')

    for node in graph.nodes():
        G_dot.node(str(node))
    for edge in graph.edges():
        G_dot.edge(str(edge[0]), str(edge[1]))

    # Render the graph in a hierarchical layout
    G_dot.render(f"pics/graph_{key}.png", view=False)

In [5]:
#load dags.pkl
import pickle
with open('dags.pkl', 'rb') as f:
    dags = pickle.load(f)

    

In [6]:
from graphviz import Digraph

for key, graph in enumerate(dags):
    G_dot = Digraph(engine="dot",format='png')

    for node in graph.nodes():
        G_dot.node(str(node))
    for edge in graph.edges():
        G_dot.edge(str(edge[0]), str(edge[1]))

    # Render the graph in a hierarchical layout
    G_dot.render(f"pics/{key}_graph.png", view=False)

# Checking the Observations

# Reconstructing the formula

In [2]:
import pickle

In [3]:
import numpy as np
np.round(np.random.uniform(low=-1, high=1),2)

0.35

In [10]:
#load observations.pkl
with open('observations.pkl', 'rb') as f:
    observations = pickle.load(f)
    

In [11]:
import sys
sys.path.append("../d2c")
from utils import dag_to_formula

In [12]:
#load updated_dags.pkl
with open('updated_dags.pkl', 'rb') as f:
    updated_dags = pickle.load(f)

In [16]:
dag = updated_dags[-1]


formula = dag_to_formula(dag)
print(formula)


2 = 0.65238*2_t-1 + 0.08363*2_t-2 + 0.58096*2_t-3 + 0.08854
1 = 0.02613*2_t-1 + 0.02613*2_t-2 + 0.02613*2_t-3 + 0.67594*1_t-1 + -0.67203*1_t-2 + 0.30246*1_t-3 + 0.00184
0 = 0.91798*2_t-1 + 0.91798*2_t-2 + 0.91798*2_t-3 + 0.09556*1_t-1 + 0.09556*1_t-2 + 0.09556*1_t-3 + 0.07465*0_t-1 + -0.76375*0_t-2 + 0.22227*0_t-3 + -0.03458



In [14]:
observations[3]

Unnamed: 0,0,1,2
0,2.365900e-01,-4.346700e-01,0.95201
1,3.461400e-01,-1.189400e-01,-0.42063
2,1.940000e-02,-7.750800e-01,-0.54609
3,4.716400e-01,3.927700e-01,0.89932
4,2.001800e-01,7.058100e-01,-1.21582
...,...,...,...
297,3.968097e+14,-1.342809e+10,-0.02748
298,4.465771e+14,-1.240734e+11,-0.23560
299,5.026030e+14,-4.736599e+10,0.01246
300,5.656598e+14,1.236509e+11,-0.19584


In [15]:
for i in range(10):
    print(observations[i])

           0          1        2
0   -0.99844    0.98442  0.23496
1    0.22331   -0.98587 -0.95388
2    0.04955   -0.20028 -0.90667
3   -0.04738    0.22680  1.11966
4    0.01777   -1.37269  0.03034
..       ...        ...      ...
297  0.02254  -77.75814  0.03542
298  0.03577  124.85397  0.04834
299  0.00735 -136.93057  0.00017
300  0.04248  108.63246  0.02837
301  0.07918  -47.75059  0.06796

[302 rows x 3 columns]
           0        1        2
0    0.22426  0.18966 -0.49878
1    0.14986  0.04463  0.35355
2   -0.36073 -0.06368 -0.36801
3    0.58093 -0.13256  0.25970
4   -0.71243 -0.17278 -0.29328
..       ...      ...      ...
295 -0.02055 -0.21930 -0.05235
296  0.03794 -0.22651 -0.02127
297 -0.02542 -0.22325 -0.05980
298  0.07010 -0.20111 -0.02780
299 -0.03134 -0.19849 -0.05313

[300 rows x 3 columns]
           0        1        2
0   -0.43488  0.83368 -0.58376
1    0.17557  0.56312  0.31458
2   -0.07940 -0.11166 -0.13506
3    0.01852 -0.06045  0.08543
4   -0.01752 -0.16200 -0.0118

In [88]:
import numpy as np
np.random.rand(7, 7)*2 -1 

array([[-0.4116396 ,  0.59918992, -0.05521327, -0.96344891, -0.28722392,
         0.41178005, -0.46637332],
       [-0.36274806, -0.94912366,  0.70995491, -0.34068037, -0.55014107,
         0.73657334, -0.39524943],
       [ 0.27294033, -0.52211236, -0.0163236 ,  0.14364735, -0.09822959,
         0.61521106,  0.60795329],
       [-0.73870347, -0.73798428, -0.84602041,  0.67001708,  0.04804186,
        -0.16917231,  0.89916076],
       [ 0.92477042,  0.64188493, -0.99546754, -0.45671148, -0.28165316,
         0.71274447, -0.58608868],
       [-0.00413877, -0.65492847,  0.5151952 ,  0.22311102, -0.85539062,
         0.86236875,  0.26217668],
       [ 0.31001345,  0.73421258,  0.43562432,  0.59459153, -0.36976323,
         0.09532587, -0.83274745]])

In [18]:
from statsmodels.tsa.api import VAR
import numpy as np
#np from txt
data = np.loadtxt('linear-VAR_N-3_T-150_0194.txt')
# Fit the VAR model
model = VAR(data)
results = model.fit(maxlags=5, ic='aic')
print(results.params)


[[-0.20441909 -0.15274518 -0.1583341 ]
 [ 0.5377656  -0.08527162 -0.06662733]
 [-0.00156988  0.20105087  0.12849242]
 [ 0.05452649  0.05243838  0.06637332]]
