In [1]:
import numpy as np
import pandas as pd

from dowhy import gcm
from dowhy.gcm import MedianCDFQuantileScorer

from ocular.causal_model import dag

In [2]:
nodes = [('X1', 'X2'), ('X2', 'X3'), ('X3', 'X4'), ('X4', 'X5')]
features = ['X1', 'X2', 'X3', 'X4', 'X5']
target_node = 'X5'
error_threshold_change=0.05
causal_graph = dag.CausalGraph(nodes, features)
all_ancestors_of_node = causal_graph.ancestors[target_node]
all_ancestors_of_node.update({target_node})
sorted_nodes = [node for node in causal_graph.sorted_nodes if node in all_ancestors_of_node]

In [3]:
def train_models(data_sample, causal_graph, sorted_nodes):
    models = {}
    for node in sorted_nodes:
        parents = causal_graph.parents[node]
        ## if a node is a root
        if not parents:
            ## initialize model for root node and fit the distribution
            models[node] = gcm.ScipyDistribution()
            X = data_sample[node].to_numpy()
            #print(f'Does X contains nan value? {np.isnan(X).any()}')
            print(f"Test X for finiteness {np.isfinite(X).all()}")
            models[node].fit(X=X)
            print('done fitting gcm.ScipyDistribution()')
        else:
            #logging.info(f'at node {node} with parents {parents}')
            fm_model = gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor())
            fm_model.fit(X=data_sample[parents].to_numpy(), Y=data_sample[node].to_numpy())

            ## set the first model
            models[node] = fm_model 

In [4]:
data_sample = pd.read_csv('test_nslide3_1.csv')
models = train_models(data_sample, causal_graph, sorted_nodes)

Test X for finiteness True
self.find_suitable_continuous_distribution(X)
x_neighbourhood input (151, 1)
y_neighbourhood input(151, 1)
finally found the suitable distribution
done fitting gcm.ScipyDistribution()
Fitting emperical model
Fitting emperical model
Fitting emperical model
Fitting emperical model


In [5]:
data_sample = pd.read_csv('test_nslide3_2.csv')
models = train_models(data_sample, causal_graph, sorted_nodes)

Test X for finiteness True
self.find_suitable_continuous_distribution(X)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourhood input (301, 1)
y_neighbourhood input(301, 1)
x_neighbourh

ValueError: Input X contains infinity or a value too large for dtype('float64').

In [None]:
data_sample = pd.read_csv('test_nslide3_3.csv')
models = train_models(data_sample, causal_graph, sorted_nodes)