In [1]:
##
## script_5_concept_embedding.ipynb
## building up a process to use concept-singleton, singleton-concept, and concept-concept interaction
## using all numerical
## ==> can be cyclic: 1) check assert DAG 2) in utils.py look for DAG check
## ==> i dont think its (problem of low perform in case of concepts) related to choosing w_threshold, 
##     rather with the data generating process, check for concepts_fake with dimensions [1, 1, 1, ..., 1]
##     concepts(W_notears) and concepts_fake (W_notears2)
## ==> made the CustomNN simpler, also using additive noise model to generate data
## ==> normalized adjacency matrix for concept
##

In [2]:
## import

import torch
import numpy as np
import notears.utils as ut
import pandas as pd
from notears import nonlinear
import igraph as ig
import torch.nn as nn
from scipy.special import expit as sigmoid

In [3]:
## variables

concept_dim_limit=10
param_scale = 4
hidden_unit = 100

In [4]:
## functions and classes

def get_transformed_data(dim_input, dim_output, data_input):

    class CustomNN(nn.Module):
        def __init__(self):
            super(CustomNN, self).__init__()
            self.nn_reg = nn.Sequential(
                nn.Linear(dim_input, hidden_unit),
                nn.Sigmoid(),
                
                nn.Linear(hidden_unit, dim_output),
            )

        def forward(self, x):
            output = self.nn_reg(x)
            return output
        
    model = CustomNN()
    data_output = model(data_input)
    return data_output

In [5]:
## setting up environment params

torch.set_default_dtype(torch.double)
np.set_printoptions(precision=3)
ut.set_random_seed(123)

In [6]:
## (1) generate a causal graph at random as you have done already (eg. x1->x2) but this time it will represent relations between concepts, 

n, dcon, s0, graph_type, sem_type = 200, 5, 9, 'ER', 'mlp'
B_true = ut.simulate_dag(dcon, s0, graph_type)
np.savetxt('datasets/W_true.csv', B_true, delimiter=',')
# X = ut.simulate_nonlinear_sem(B_true, n, sem_type)
# np.savetxt('datasets/X.csv', X, delimiter=',')
print(B_true)

[[0. 0. 0. 0. 0.]
 [1. 0. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]]


In [8]:
## (2) randomly decide the embedding size of your concepts (eg. dim(x1)=3, dim(x2)=5). 

concepts = torch.randint(1, concept_dim_limit+1, (dcon,)) 
concepts = [int(i) for i in concepts]
print(concepts)

concepts_fake = torch.randint(1, 2, (dcon,)) 
concepts_fake = [int(i) for i in concepts_fake]
print(concepts_fake)


[5, 1, 10, 4, 3]
[1, 1, 1, 1, 1]


In [9]:
## (3) generate a list of neural networks for each effect concept (eg. nn_x2 (input=3, output=5, weights=random), 
## (4) generate data for x1 = randn(dim=3) for x2 = nn_x2(x1) + eps*rand(dim=5)

def get_generated_data(con, B_true, dcon, n):
    dflat = sum(con)
    G = ig.Graph.Adjacency(B_true.tolist())
    ordered_vertices = G.topological_sorting()  
    assert len(ordered_vertices) == dcon

    dict_new_x = {}
    for v_index in ordered_vertices:
        col = B_true[:, v_index]
        col_sum = np.sum(col, axis=0)
        if col_sum == 0:
            portion_parent = 0
        else:
            dim_output = con[v_index]
            dim_input = 0
            data_input = None
            i=0
            for row in col:
                if row == 1:
                    dim_input += con[i]
                    if data_input is None:
                        data_input = dict_new_x[i]
                    else:
                        data_input = torch.cat([data_input, dict_new_x[i]], dim=1) 
                i+=1

            data_output = get_transformed_data(dim_input, dim_output, data_input)
            portion_parent = data_output.detach()

        portion_noise = torch.randn(n, con[v_index])
        if col_sum == 0:
            new_x = portion_noise
        else:
            new_x = portion_parent + portion_noise
        dict_new_x[v_index] = param_scale * new_x

    Xf = dict_new_x[0]
    for i in range(1, dcon):
        Xf = np.hstack((Xf, dict_new_x[i]))
    return Xf

Xf = get_generated_data(concepts, B_true, dcon, n)
Xf_fake = get_generated_data(concepts_fake, B_true, dcon, n)

In [10]:
print(Xf.shape)
print(Xf[0:2])

(200, 23)
[[ 1.352  4.414 -3.455  2.089  8.818  6.098 -5.84  -0.488  6.432 -3.375
   3.737 -3.957 12.039 -4.294 -2.784 -2.027 -4.482  1.182 -5.938  0.493
   4.919 -2.257  8.217]
 [-2.362 -6.079 -2.877  6.747  0.736 -1.661 -2.521  1.046  1.02  -0.583
   0.196 10.539  5.156  2.429 -0.486 -0.069  1.713 -5.668  2.336 -3.646
  -1.075  4.082 -0.94 ]]


In [11]:
print(Xf_fake.shape)
print(Xf_fake[0:2])

(200, 5)
[[-8.978  1.068 -2.203  7.569  1.623]
 [-0.37   1.554 -0.171  3.762 -3.282]]


In [12]:
## properties of X, Xf

print(Xf.shape)
print(sum(sum(np.abs(Xf))))
print()
print(Xf_fake.shape)
print(sum(sum(np.abs(Xf_fake))))

(200, 23)
15396.716116979487

(200, 5)
3189.324408157695


In [13]:
## induced bias

mask = np.ones((dcon, dcon)) * np.nan
mask

array([[nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan]])

In [14]:
## forming metainfo related to concepts


print(Xf.shape)
n, dflat = Xf.shape[0], Xf.shape[1]

assert len(concepts) == dcon 
assert sum(concepts) == dflat

metainfo = {}
metainfo['dflat'] = dflat
metainfo['dcon'] = dcon
metainfo['concepts'] = concepts

print(n, metainfo)

(200, 23)
200 {'dflat': 23, 'dcon': 5, 'concepts': [5, 1, 10, 4, 3]}


In [1]:
## initializing model and running the optimization

model = nonlinear.NotearsMLP(
    dims=[dflat, 10, 1], bias=True,
    mask=mask, w_threshold=0.3, learned_model=None,
    metainfo=metainfo
)

W_notears, res = nonlinear.notears_nonlinear(model, Xf, lambda1=0.01, lambda2=0.01)

NameError: name 'nonlinear' is not defined

In [89]:
## performance evaluation

# assert ut.is_dag(W_notears)
np.savetxt('outputs/W_notears.csv', W_notears, delimiter=',')
acc = ut.count_accuracy(B_true, W_notears != 0)
print(acc)
print(B_true)
print(W_notears)

{'fdr': 0.3, 'tpr': 0.7777777777777778, 'fpr': 3.0, 'shd': 3, 'nnz': 10}
[[0. 0. 0. 0. 0.]
 [1. 0. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]]
[[0.    0.    2.107 0.    1.742]
 [1.962 0.    1.679 3.145 1.565]
 [0.    0.    0.    0.    0.886]
 [1.722 0.    1.685 0.    1.699]
 [0.    0.    0.    0.    0.   ]]


In [90]:
## initializing model and running the optimization

metainfo2 = {}
metainfo2['dflat'] = dcon
metainfo2['dcon'] = dcon
metainfo2['concepts'] = [1 for i in range(dcon)]

model2 = nonlinear.NotearsMLP(
    dims=[dcon, 10, 1], bias=True,
    mask=mask, w_threshold=0.3, learned_model=None,
    metainfo=metainfo2
)

W_notears2, res2 = nonlinear.notears_nonlinear(model2, Xf_fake, lambda1=0.01, lambda2=0.01)

-----iteration no:  0
-----iteration no:  1
-----iteration no:  2
-----iteration no:  3
-----iteration no:  4
-----iteration no:  5
-----iteration no:  6
-----iteration no:  7
-----iteration no:  8
-----iteration no:  9
-----iteration no:  10
-----iteration no:  11


In [91]:
## performance evaluation

# assert ut.is_dag(W_notears2)
np.savetxt('outputs/W_notears2.csv', W_notears2, delimiter=',')
acc2 = ut.count_accuracy(B_true, W_notears2 != 0)
print(acc2)
print(B_true)
print(W_notears2)

{'fdr': 0.5, 'tpr': 0.5555555555555556, 'fpr': 5.0, 'shd': 5, 'nnz': 10}
[[0. 0. 0. 0. 0.]
 [1. 0. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]]
[[0.    4.423 2.854 0.    4.132]
 [0.    0.    3.313 0.    3.531]
 [0.    0.    0.    0.    5.135]
 [2.831 3.35  2.524 0.    2.331]
 [0.    0.    0.    0.    0.   ]]
