In [1]:
###################################
## install and import
###################################

In [2]:
# !pip install python-igraph
from notears.locally_connected import LocallyConnected
from notears.lbfgsb_scipy import LBFGSBScipy
from notears.trace_expm import trace_expm
import torch
import torch.nn as nn
import numpy as np
import math
import notears.utils as ut
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize
import pandas as pd
import os

  from pandas.core import (


In [3]:
###################################
## class
###################################

In [4]:
class NotearsMLP(nn.Module):
    def __init__(self, dims, bias=True):
        super(NotearsMLP, self).__init__()
        assert len(dims) >= 2
        assert dims[-1] == 1
        d = dims[0]
        self.dims = dims

        # fc1: variable splitting for l1
        self.fc1_pos = nn.Linear(d, d * dims[1], bias=bias)
        self.fc1_neg = nn.Linear(d, d * dims[1], bias=bias)
        self.fc1_pos.weight.bounds = self._bounds()
        self.fc1_neg.weight.bounds = self._bounds()

        # fc2: Locally connected layers with BatchNorm
        layers = []
        for l in range(len(dims) - 2):
            layers.append(LocallyConnected(d, dims[l + 1], dims[l + 2], bias=bias))
        self.fc2 = nn.ModuleList(layers)

    def _bounds(self):
        d = self.dims[0]
        bounds = []
        for j in range(d):
            for m in range(self.dims[1]):
                for i in range(d):
                    if i == j:
                        bound = (0, 0)
                    else:
                        bound = (0, None)
                    bounds.append(bound)
        return bounds

    def forward(self, x):  # [n, d] -> [n, d]
        # Apply fc1 and normalize
        x = self.fc1_pos(x) - self.fc1_neg(x)  # [n, d * m1]
        x = x.view(-1, self.dims[0], self.dims[1])  # Reshape to [n, d, m1]

        # Apply fc2 layers and normalization
        for fc in self.fc2:
            x = torch.sigmoid(x)  # Activation
            x = fc(x)  # Locally connected layer output [n, d, m2]

        x = x.squeeze(dim=2)  # [n, d]
        return x

    def h_func(self):
        """Constrain 2-norm-squared of fc1 weights along m1 dim to be a DAG"""
        d = self.dims[0]
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j * m1, i]
        fc1_weight = fc1_weight.view(d, -1, d)  # [j, m1, i]
        A = torch.sum(fc1_weight * fc1_weight, dim=1).t()  # [i, j]
        h = trace_expm(A) - d  # (Zheng et al. 2018)
        # A different formulation, slightly faster at the cost of numerical stability
        # M = torch.eye(d) + A / d  # (Yu et al. 2019)
        # E = torch.matrix_power(M, d - 1)
        # h = (E.t() * M).sum() - d
        return h

    def l2_reg(self):
        """Take 2-norm-squared of all parameters"""
        reg = 0.
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j * m1, i]
        reg += torch.sum(fc1_weight ** 2)
        for fc in self.fc2:
            reg += torch.sum(fc.weight ** 2)
        return reg

    def fc1_l1_reg(self):
        """Take l1 norm of fc1 weight"""
        reg = torch.sum(self.fc1_pos.weight + self.fc1_neg.weight)
        return reg

    @torch.no_grad()
    def fc1_to_adj(self) -> np.ndarray:  # [j * m1, i] -> [i, j]
        """Get W from fc1 weights, take 2-norm over m1 dim"""
        d = self.dims[0]
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j * m1, i]
        fc1_weight = fc1_weight.view(d, -1, d)  # [j, m1, i]
        A = torch.sum(fc1_weight * fc1_weight, dim=1).t()  # [i, j]
        W = torch.sqrt(A)  # [i, j]
        W = W.cpu().detach().numpy()  # [i, j]
        return W

class NotearsMLP2(nn.Module):
    def __init__(self, dims, bias=True):
        super(NotearsMLP2, self).__init__()
        assert len(dims) >= 2
        assert dims[-1] == 1
        d = dims[0]
        self.dims = dims

        # fc1: variable splitting for l1
        self.fc1_pos = nn.Linear(d, d * dims[1], bias=bias)
        self.fc1_neg = nn.Linear(d, d * dims[1], bias=bias)
        self.fc1_pos.weight.bounds = self._bounds()
        self.fc1_neg.weight.bounds = self._bounds()

        # Layer normalization after fc1
        self.ln1 = nn.LayerNorm(d * dims[1])  # Normalize across feature dimensions

        # fc2: Locally connected layers with BatchNorm
        layers = []
        for l in range(len(dims) - 2):
            layers.append(LocallyConnected(d, dims[l + 1], dims[l + 2], bias=bias))
        self.fc2 = nn.ModuleList(layers)
        self._apply_bounds()                

    def _bounds(self):
        d = self.dims[0]
        bounds = []
        for j in range(d):
            for m in range(self.dims[1]):
                for i in range(d):
                    if i == j:
                        bound = (0, 0)
                    else:
                        bound = (0, None)
                    bounds.append(bound)
        return bounds
    
    def _apply_bounds(self):
        """Clip weights of fc1_pos and fc1_neg to stay within bounds."""
        with torch.no_grad():
            for layer, bounds in [(self.fc1_pos, self.fc1_pos.weight.bounds), (self.fc1_neg, self.fc1_neg.weight.bounds)]:
                # Apply bounds per weight
                for idx, (min_bound, max_bound) in enumerate(bounds):
                    if max_bound is not None:  # Clip only if max_bound is defined
                        layer.weight.data.view(-1)[idx].clamp_(min_bound, max_bound)    

    def forward(self, x):  # [n, d] -> [n, d]
        # Apply fc1 and normalize
        x = self.fc1_pos(x) - self.fc1_neg(x)  # [n, d * m1]
        # self._apply_bounds()  # Enforce bounds on fc1 weights
        x = self.ln1(x)  # Apply LayerNorm after fc1
        x = x.view(-1, self.dims[0], self.dims[1])  # Reshape to [n, d, m1]

        # Apply fc2 layers and normalization
        for fc in self.fc2:
            x = torch.sigmoid(x)  # Activation
            x = fc(x)  # Locally connected layer output [n, d, m2]

        x = x.squeeze(dim=2)  # [n, d]
        return x

    def h_func(self):
        """Constrain 2-norm-squared of fc1 weights along m1 dim to be a DAG"""
        d = self.dims[0]
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j * m1, i]
        fc1_weight = fc1_weight.view(d, -1, d)  # [j, m1, i]
        A = torch.sum(fc1_weight * fc1_weight, dim=1).t()  # [i, j]
        h = trace_expm(A) - d  # (Zheng et al. 2018)
        # A different formulation, slightly faster at the cost of numerical stability
        # M = torch.eye(d) + A / d  # (Yu et al. 2019)
        # E = torch.matrix_power(M, d - 1)
        # h = (E.t() * M).sum() - d
        return h

    def l2_reg(self):
        """Take 2-norm-squared of all parameters"""
        reg = 0.
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j * m1, i]
        reg += torch.sum(fc1_weight ** 2)
        for fc in self.fc2:
            reg += torch.sum(fc.weight ** 2)
        return reg

    def fc1_l1_reg(self):
        """Take l1 norm of fc1 weight"""
        reg = torch.sum(self.fc1_pos.weight + self.fc1_neg.weight)
        return reg

    @torch.no_grad()
    def fc1_to_adj(self) -> np.ndarray:  # [j * m1, i] -> [i, j]
        """Get W from fc1 weights, take 2-norm over m1 dim"""
        d = self.dims[0]
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j * m1, i]
        fc1_weight = fc1_weight.view(d, -1, d)  # [j, m1, i]
        A = torch.sum(fc1_weight * fc1_weight, dim=1).t()  # [i, j]
        W = torch.sqrt(A)  # [i, j]
        W = W.cpu().detach().numpy()  # [i, j]
        return W

class NotearsSobolev(nn.Module):
    def __init__(self, d, k):
        """d: num variables k: num expansion of each variable"""
        super(NotearsSobolev, self).__init__()
        self.d, self.k = d, k
        self.fc1_pos = nn.Linear(d * k, d, bias=False)  # ik -> j
        self.fc1_neg = nn.Linear(d * k, d, bias=False)
        self.fc1_pos.weight.bounds = self._bounds()
        self.fc1_neg.weight.bounds = self._bounds()
        nn.init.zeros_(self.fc1_pos.weight)
        nn.init.zeros_(self.fc1_neg.weight)
        self.l2_reg_store = None

    def _bounds(self):
        # weight shape [j, ik]
        bounds = []
        for j in range(self.d):
            for i in range(self.d):
                for _ in range(self.k):
                    if i == j:
                        bound = (0, 0)
                    else:
                        bound = (0, None)
                    bounds.append(bound)
        return bounds

    def sobolev_basis(self, x):  # [n, d] -> [n, dk]
        seq = []
        for kk in range(self.k):
            mu = 2.0 / (2 * kk + 1) / math.pi  # sobolev basis
            psi = mu * torch.sin(x / mu)
            seq.append(psi)  # [n, d] * k
        bases = torch.stack(seq, dim=2)  # [n, d, k]
        bases = bases.view(-1, self.d * self.k)  # [n, dk]
        return bases

    def forward(self, x):  # [n, d] -> [n, d]
        bases = self.sobolev_basis(x)  # [n, dk]
        x = self.fc1_pos(bases) - self.fc1_neg(bases)  # [n, d]
        self.l2_reg_store = torch.sum(x ** 2) / x.shape[0]
        return x

    def h_func(self):
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j, ik]
        fc1_weight = fc1_weight.view(self.d, self.d, self.k)  # [j, i, k]
        A = torch.sum(fc1_weight * fc1_weight, dim=2).t()  # [i, j]
        h = trace_expm(A) - d  # (Zheng et al. 2018)
        # A different formulation, slightly faster at the cost of numerical stability
        # M = torch.eye(self.d) + A / self.d  # (Yu et al. 2019)
        # E = torch.matrix_power(M, self.d - 1)
        # h = (E.t() * M).sum() - self.d
        return h

    def l2_reg(self):
        reg = self.l2_reg_store
        return reg

    def fc1_l1_reg(self):
        reg = torch.sum(self.fc1_pos.weight + self.fc1_neg.weight)
        return reg

    @torch.no_grad()
    def fc1_to_adj(self) -> np.ndarray:
        fc1_weight = self.fc1_pos.weight - self.fc1_neg.weight  # [j, ik]
        fc1_weight = fc1_weight.view(self.d, self.d, self.k)  # [j, i, k]
        A = torch.sum(fc1_weight * fc1_weight, dim=2).t()  # [i, j]
        W = torch.sqrt(A)  # [i, j]
        W = W.cpu().detach().numpy()  # [i, j]
        return W

In [5]:
###################################
## function
###################################

In [6]:
def squared_loss(output, target):
    n = target.shape[0]
    loss = 0.5 / n * torch.sum((output - target) ** 2)
    return loss

def notears_nonlinear(model: nn.Module,
                      X: np.ndarray,
                      lambda1: float = 0.,
                      lambda2: float = 0.,
                      max_iter: int = 100,
                      h_tol: float = 1e-8,
                      rho_max: float = 1e+16,
                      w_threshold: float = 0.3):
    model.w_threshold = w_threshold
    rho, alpha, h = 1.0, 0.0, np.inf
    for _ in range(max_iter):
        rho, alpha, h = dual_ascent_step(model, X, lambda1, lambda2,
                                         rho, alpha, h, rho_max)
        if h <= h_tol or rho >= rho_max:
            break
    W_est = model.fc1_to_adj()
    W_est[np.abs(W_est) < w_threshold] = 0
    return W_est

def notears_nonlinear_with_loss_std(model: nn.Module,
                                    X_list: list,  
                                    lambda1: float = 0.0,
                                    lambda2: float = 0.0,
                                    max_iter: int = 100,
                                    h_tol: float = 1e-8,
                                    rho_max: float = 1e+16,
                                    w_threshold: float = 0.3,
                                    std_lambda: float = 1.0,
                                    lr: float = 1e-3,
                                    s0: int = 1
                                   ):  
    model.w_threshold = w_threshold    
    rho, alpha, h = 1.0, 0.0, np.inf
    for iter_no in range(max_iter):
        rho, alpha, h = dual_ascent_step_with_loss_std(
            model, X_list, lambda1, lambda2, std_lambda, rho, alpha, h, rho_max, iter_no, lr=lr)
        if h <= h_tol or rho >= rho_max:
            break
    W_est = model.fc1_to_adj()
    # W_est[np.abs(W_est) < w_threshold] = 0
    flat_W = np.abs(W_est).flatten()
    w_threshold = np.partition(flat_W, -s0)[-s0]  # s0-th largest
    mask = np.abs(W_est) < w_threshold
    W_est[mask] = 0
    return W_est

def dual_ascent_step(model, X, lambda1, lambda2, rho, alpha, h, rho_max):
    """Perform one step of dual ascent in augmented Lagrangian."""
    h_new = None
    optimizer = LBFGSBScipy(model.parameters())
    X_torch = torch.from_numpy(X)
    while rho < rho_max:
        def closure():
            optimizer.zero_grad()
            X_hat = model(X_torch)
            loss = squared_loss(X_hat, X_torch)
            h_val = model.h_func()
            penalty = 0.5 * rho * h_val * h_val + alpha * h_val
            l2_reg = 0.5 * lambda2 * model.l2_reg()
            l1_reg = lambda1 * model.fc1_l1_reg()
            primal_obj = loss + penalty + l2_reg + l1_reg
            # primal_obj = loss + penalty
            primal_obj.backward()
            return primal_obj
        optimizer.step(closure)  # NOTE: updates model in-place
        with torch.no_grad():
            h_new = model.h_func().item()
        if h_new > 0.25 * h:
            rho *= 10
        else:
            break
    alpha += rho * h_new
    return rho, alpha, h_new

def dual_ascent_step_with_loss_std(model, X_list, lambda1, lambda2, std_lambda, rho, alpha, h, rho_max, iter_no, lr=0.001):
    """Perform one step of dual ascent in augmented Lagrangian, with consistent gradient-based learning (CGLearn) for each predictor."""

    crp = std_lambda  # Consistency ratio percentile
    h_new = None
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # Using Adam optimizer instead of LBFGSBScipy
    # X_tensors = [torch.from_numpy(X).float().to(torch.double).to(model.fc1_pos.weight.device) for X in X_list]  # Ensure tensors are on the correct device
    X_tensors = [torch.from_numpy(X).to(torch.double).to(model.fc1_pos.weight.device) for X in X_list]
    
    while rho < rho_max:
        print('------------------')                
        print('iter_no', iter_no)
        print('W')
        print(model.fc1_to_adj())
        feature_l2_norms_pos_per_predictor = [[] for _ in range(model.dims[0])]  # Separate L2 norms for fc1_pos
        feature_l2_norms_neg_per_predictor = [[] for _ in range(model.dims[0])]  # Separate L2 norms for fc1_neg
        list_all_grads = []  # To store gradients for the entire model (all layers)

        # Step 1: Compute loss and gradients for each environment
        print()
        print()
        print()
        for X in X_tensors:
            # Loss calculation
            X_hat = model(X)
            loss_mse = squared_loss(X_hat, X)
            h_val = model.h_func()
            penalty = 0.5 * rho * h_val * h_val + alpha * h_val
            l2_reg = 0.5 * lambda2 * model.l2_reg()
            l1_reg = lambda1 * model.fc1_l1_reg()
            final_loss = (loss_mse + penalty + l2_reg + l1_reg) / len(X_tensors)  # Averaged across environments

            # Grad calculation
            optimizer.zero_grad()
            final_loss.backward()

            # Collect all gradients for the entire model (for all layers)
            grads = []
            for param in model.parameters():
                grads.append(param.grad.clone().flatten())
            all_grads = torch.cat(grads)  # Flatten and concatenate all gradients for all parameters
            list_all_grads.append(all_grads)  # Store gradients for this dataset
            
            print('#######################################################################################################################')
            print(model.fc1_pos.weight.grad.t())
            # print(model.fc1_neg.weight.grad.t())                
            print('#######################################################################################################################')            

            # Compute and store gradients per predictor for the first hidden layer
            for i in range(model.dims[0]):  # Iterate over each predictor (input feature)
                # Get the gradients for the first hidden layer (fc1_pos and fc1_neg)                
                pos_g = model.fc1_pos.weight.grad.view(model.dims[0], -1, model.dims[0])[i, :, :]  # Shape [5, 10]
                neg_g = model.fc1_neg.weight.grad.view(model.dims[0], -1, model.dims[0])[i, :, :]  # Shape [5, 10]
                                
                # Compute L2 norms of the gradients across the hidden neurons for each feature                
                l2_norms_pos = torch.norm(pos_g, dim=0, p=2).detach()  # L2 norm across hidden neurons, result shape: [10]
                l2_norms_neg = torch.norm(neg_g, dim=0, p=2).detach()  # Same for fc1_neg
                
                # Store L2 norms for each predictor
                feature_l2_norms_pos_per_predictor[i].append(l2_norms_pos)
                feature_l2_norms_neg_per_predictor[i].append(l2_norms_neg)
                
        # Step 2: Compute mean of all gradients for the entire model across all datasets
        list_all_grads = torch.stack(list_all_grads)  # Stack all gradients for all datasets
        mean_all_grads = torch.mean(list_all_grads, dim=0)  # Mean gradient for the entire model

        # Step 3: Gradient consistency calculations (CGLearn) per predictor
        consistency_masks_pos = []
        consistency_masks_neg = []
            
        for i in range(model.dims[0]):  # Iterate over each predictor (input feature)
            
            # Consistency check for fc1_pos weights of predictor i
            feature_l2_norms_pos = torch.stack(feature_l2_norms_pos_per_predictor[i])  # Stack L2 norms across datasets
            mean_norms_pos = torch.mean(feature_l2_norms_pos, dim=0)  # Mean of L2 norms for each feature
            std_norms_pos = torch.std(feature_l2_norms_pos, dim=0) + 1e-8  # Standard deviation of L2 norms for each feature
            cr_pos = torch.abs(mean_norms_pos) / std_norms_pos  # Consistency ratio
            cr_pos[i] = 0                                        
            if crp == 101:
                consistency_mask_pos = torch.zeros_like(cr_pos, device=model.fc1_pos.weight.device)
            elif crp == -1:
                a = torch.tensor(cr_pos, dtype=torch.float32)
                b = (a - a.min()) / (a.max() - a.min() + 1e-8)
                consistency_mask_pos = b * (len(b) / b.sum())  # Scale so sum(crp) = len(crp)
            else:
                ct_pos = np.percentile(cr_pos.cpu().numpy(), crp)  # Threshold based on the percentile                
                consistency_mask_pos = torch.where(
                    cr_pos >= ct_pos, 
                    torch.tensor(1., device=model.fc1_pos.weight.device), 
                    torch.tensor(0., device=model.fc1_pos.weight.device)
                )         
            consistency_mask_pos[i] = 0
            consistency_masks_pos.append(consistency_mask_pos.repeat(model.dims[1], 1))  # Broadcast the mask
            
            # Consistency check for fc1_neg weights of predictor i
            feature_l2_norms_neg = torch.stack(feature_l2_norms_neg_per_predictor[i])  # Same for fc1_neg
            mean_norms_neg = torch.mean(feature_l2_norms_neg, dim=0)
            std_norms_neg = torch.std(feature_l2_norms_neg, dim=0) + 1e-8
            cr_neg = torch.abs(mean_norms_neg) / std_norms_neg  # Consistency ratio
            cr_neg[i] = 0
            if crp == 101:
                consistency_mask_neg = torch.zeros_like(cr_neg, device=model.fc1_neg.weight.device)
            elif crp == -1:
                a = torch.tensor(cr_neg, dtype=torch.float32)
                b = (a - a.min()) / (a.max() - a.min() + 1e-8)
                consistency_mask_neg = b * (len(b) / b.sum())  # Scale so sum(crp) = len(crp)
            else:
                ct_neg = np.percentile(cr_neg.cpu().numpy(), crp)
                consistency_mask_neg = torch.where(
                    cr_neg >= ct_neg, 
                    torch.tensor(1., device=model.fc1_neg.weight.device), 
                    torch.tensor(0., device=model.fc1_neg.weight.device)
                )
            consistency_mask_neg[i] = 0                
            consistency_masks_neg.append(consistency_mask_neg.repeat(model.dims[1], 1))

            
        # Step 4: Apply the masks to gradients before updating parameters
        cmp = torch.stack(consistency_masks_pos).view(-1, model.dims[0]) 
        ## after stack, dim=0 is for target, which is correct cause the list element is for each target     
        cmn = torch.stack(consistency_masks_neg).view(-1, model.dims[0])        
        ## after stack, dim=0 is for target, which is correct cause the list element is for each target     
        ## need to be shaped as [j, m1, i] == [target, interimNeurons, input] as the weights are in the shape [j, m1, i]
        print('cmp')
        print(cmp.t())
        print('cmn')
        print(cmn.t())
        start_index = 0
        for name, param in model.named_parameters():
            param_numel = param.numel()
            mean_grad = mean_all_grads[start_index: start_index + param_numel].view_as(param)
            if 'fc1_pos.weight' in name:
                param.grad = mean_grad * cmp       
            elif 'fc1_neg.weight' in name:
                param.grad = mean_grad * cmn    
            else:
                param.grad = mean_grad
            start_index += param_numel

        # Step 5: Now that the gradients have been modified, perform the optimization step
        optimizer.step()
        model._apply_bounds()        

        # Step 6: Check convergence and adjust rho
        with torch.no_grad():
            h_new = model.h_func().item()
        if h_new > 0.25 * h:
            rho *= 10
        else:
            break
            
        # ww = model.fc1_to_adj()
        # ww[np.abs(ww) < model.w_threshold] = 0
        # print(ww)

    alpha += rho * h_new
    return rho, alpha, h_new


In [7]:
##
## todo1: create small run, monitor, and check the whole process validity
##
exp_id = '08_cNewEnvSmallRun'
directories = ['inputs', 'outputs']
for directory in directories:
    path = os.path.join(directory, exp_id)
    if not os.path.exists(path):
        os.makedirs(path)
        print(f"Created folder: {path}")
    else:
        print(f"Folder already exists: {path}")

torch.set_default_dtype(torch.double)
np.set_printoptions(precision=3)
list_res = []
column_titles = ["ntrials", "sample", "node", "edge", "graph", "SEM", "hiddenU", "l1", "l2", "wthresh", "lr", "cthresh", "fdr", "shd", "tpr", "nnz"]
df = pd.DataFrame(list_res, columns=column_titles)
df.to_csv(f'outputs/{exp_id}/result.csv', index=False, encoding="utf-8-sig")  
##############################################################
########## set parameters for experiments
ntrials = 1
list_opt = [ ## + check number of trials
    (40, 3, 2/3, 'ER', 'mlp', 1, 0, 0, 0.3, 0.1),
]
##############################################################
for opt in list_opt:
    n, d, _s0, graph_type, sem_type, nh, l1, l2, wt, lr = opt
    s0 = int(d*_s0)
    list_fdr0, list_shd0, list_tpr0, list_nnz0 = [], [], [], [] ## notears 
    list_fdr1, list_shd1, list_tpr1, list_nnz1 = [], [], [], [] ## notears + invariance 50%
    list_fdr2, list_shd2, list_tpr2, list_nnz2 = [], [], [], [] ## notears + invariance continuous    
    for tn in range(ntrials):
        ##
        ## initialize
        ##
        ut.set_random_seed(123+tn)
        B_true = ut.simulate_dag(d, s0, graph_type)
        np.savetxt(f'inputs/{exp_id}/W_true.csv', B_true, delimiter=',')
        # noise_scale = np.ones(d)
        # X = ut.simulate_nonlinear_sem(B_true, n, sem_type, noise_scale)
        # np.savetxt('X.csv', X, delimiter=',')
        noise_scales = [0.2, 1, 2, 5, 10]
        for i, noise_scale_value in enumerate(noise_scales):
            noise_scale = np.full(d, noise_scale_value)  
            X = ut.simulate_nonlinear_sem(B_true, n, sem_type, noise_scale)
            np.savetxt(f'inputs/{exp_id}/X_{i}.csv', X, delimiter=',')  
        X_0 = np.loadtxt(f'inputs/{exp_id}/X_0.csv', delimiter=',')
        X_1 = np.loadtxt(f'inputs/{exp_id}/X_1.csv', delimiter=',')
        X_2 = np.loadtxt(f'inputs/{exp_id}/X_2.csv', delimiter=',')
        X_3 = np.loadtxt(f'inputs/{exp_id}/X_3.csv', delimiter=',')
        X_4 = np.loadtxt(f'inputs/{exp_id}/X_4.csv', delimiter=',')
        X_list = [X_0, X_1, X_2, X_3, X_4]  # List of datasets
        scaler = StandardScaler()
        X_list_standardized = [scaler.fit_transform(X) for X in X_list] ## separate fit_transform cause each dataset with different scale (diff noise scale)
        X_combined = np.vstack([X for X in X_list_standardized]) 
        ## inotears
        ## 0
        print('==================================================================================================================================')
        print('Take All')
        print('==================================================================================================================================')        
        model = NotearsMLP2(dims=[d, nh, 1], bias=True)
        W_est = notears_nonlinear_with_loss_std(model, X_list_standardized, lambda1=l1, lambda2=l2, std_lambda=0, w_threshold=wt, lr=lr, s0=s0)
        # assert ut.is_dag(W_est)
        np.savetxt(f'outputs/{exp_id}/W_est0.csv', W_est, delimiter=',')
        acc = ut.count_accuracy(B_true, W_est != 0)
        print('W_est')
        print(W_est)
        print('acc', acc)
        list_fdr0.append(acc['fdr'])
        list_shd0.append(acc['shd'])
        list_tpr0.append(acc['tpr'])
        list_nnz0.append(acc['nnz'])        
        ## 1
        print('==================================================================================================================================')
        print('Take Selective')               
        print('==================================================================================================================================')                
        model = NotearsMLP2(dims=[d, nh, 1], bias=True)
        W_est = notears_nonlinear_with_loss_std(model, X_list_standardized, lambda1=l1, lambda2=l2, std_lambda=-1, w_threshold=wt, lr=lr, s0=s0)
        # assert ut.is_dag(W_est)
        np.savetxt(f'outputs/{exp_id}/W_est1.csv', W_est, delimiter=',')
        acc = ut.count_accuracy(B_true, W_est != 0)
        print('W_est')
        print(W_est)
        print('acc', acc)        
        list_fdr1.append(acc['fdr'])
        list_shd1.append(acc['shd'])
        list_tpr1.append(acc['tpr'])
        list_nnz1.append(acc['nnz'])        
        ## 2
        print('==================================================================================================================================')
        print('Take None')        
        print('==================================================================================================================================')                
        # model = NotearsMLP2(dims=[d, nh, 1], bias=True)
        # W_est = notears_nonlinear_with_loss_std(model, X_list_standardized, lambda1=l1, lambda2=l2, std_lambda=101, w_threshold=wt, lr=lr, s0=s0)
        # # assert ut.is_dag(W_est)
        # np.savetxt(f'outputs/{exp_id}/W_est2.csv', W_est, delimiter=',')
        # acc = ut.count_accuracy(B_true, W_est != 0)
        # print('W_est')
        # print(W_est)
        # print('acc', acc)        
        # list_fdr2.append(acc['fdr'])
        # list_shd2.append(acc['shd'])
        # list_tpr2.append(acc['tpr'])
        # list_nnz2.append(acc['nnz'])        
        
    res = (
        str(ntrials), str(n), str(d), str(s0), str(graph_type), str(sem_type), str(nh), str(l1), str(l2), str(wt), str(lr), str(0), 
        f'{np.mean(list_fdr0):.4f} ± {np.std(list_fdr0):.4f}', f'{np.mean(list_shd0):.4f} ± {np.std(list_shd0):.4f}', 
        f'{np.mean(list_tpr0):.4f} ± {np.std(list_tpr0):.4f}', f'{np.mean(list_nnz0):.4f} ± {np.std(list_nnz0):.4f}'
    )
    list_res.append(res)    
    res = (
        str(ntrials), str(n), str(d), str(s0), str(graph_type), str(sem_type), str(nh), str(l1), str(l2), str(wt), str(lr), str(50), 
        f'{np.mean(list_fdr1):.4f} ± {np.std(list_fdr1):.4f}', f'{np.mean(list_shd1):.4f} ± {np.std(list_shd1):.4f}', 
        f'{np.mean(list_tpr1):.4f} ± {np.std(list_tpr1):.4f}', f'{np.mean(list_nnz1):.4f} ± {np.std(list_nnz1):.4f}'
    )
    list_res.append(res)    
    res = (
        str(ntrials), str(n), str(d), str(s0), str(graph_type), str(sem_type), str(nh), str(l1), str(l2), str(wt), str(lr), str(100), 
        f'{np.mean(list_fdr2):.4f} ± {np.std(list_fdr2):.4f}', f'{np.mean(list_shd2):.4f} ± {np.std(list_shd2):.4f}', 
        f'{np.mean(list_tpr2):.4f} ± {np.std(list_tpr2):.4f}', f'{np.mean(list_nnz2):.4f} ± {np.std(list_nnz2):.4f}'
    )
    list_res.append(res)    
df = pd.DataFrame(list_res, columns=column_titles)
df.to_csv(f'outputs/{exp_id}/result.csv', index=False, encoding="utf-8-sig")  
print('done')

Created folder: inputs/08_cNewEnvSmallRun
Created folder: outputs/08_cNewEnvSmallRun
Take All
------------------
iter_no 0
W
[[0.    0.097 0.329]
 [0.108 0.    0.817]
 [0.51  0.497 0.   ]]



#######################################################################################################################
tensor([[ 0.0208, -0.0172,  0.0026],
        [-0.0208,  0.0174, -0.0128],
        [-0.0251,  0.0433,  0.0042]])
#######################################################################################################################
#######################################################################################################################
tensor([[ 0.0223, -0.0174,  0.0013],
        [-0.0212,  0.0198, -0.0149],
        [-0.0281,  0.0463,  0.0042]])
#######################################################################################################################
########################################################################################################

  a = torch.tensor(cr_pos, dtype=torch.float32)
  a = torch.tensor(cr_neg, dtype=torch.float32)


cmp
tensor([[0.0000, 0.9889, 1.0608],
        [1.9631, 0.0000, 1.9392],
        [1.0369, 2.0111, 0.0000]], dtype=torch.float32)
cmn
tensor([[0.0000, 0.9889, 1.0608],
        [1.9631, 0.0000, 1.9392],
        [1.0369, 2.0111, 0.0000]], dtype=torch.float32)
------------------
iter_no 4
W
[[0.    0.065 0.173]
 [0.492 0.    0.751]
 [0.194 0.037 0.   ]]



#######################################################################################################################
tensor([[-2.6795e-02, -1.9653e+00, -7.8699e-01],
        [-2.3342e-01, -1.9412e-02, -1.3380e-01],
        [ 7.4790e-01,  2.5133e+00, -1.5868e-03]])
#######################################################################################################################
#######################################################################################################################
tensor([[-0.0311, -1.9716, -0.7764],
        [-0.2291, -0.0188, -0.1387],
        [ 0.7573,  2.5101, -0.0079]])
##########################

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


In [10]:
# #######################################################################################################################
# tensor([[-0.0176,  0.0203, -0.0027],
#         [ 0.0202, -0.0243,  0.0035],
#         [ 0.0191, -0.0252,  0.0046]])
# #######################################################################################################################
# #######################################################################################################################
# tensor([[-0.0166,  0.0161,  0.0005],
#         [ 0.0208, -0.0217,  0.0003],
#         [ 0.0185, -0.0195, -0.0005]])
# #######################################################################################################################
# #######################################################################################################################
# tensor([[-0.0344,  0.0664, -0.0320],
#         [ 0.0350, -0.0763,  0.0407],
#         [-0.0362,  0.0720, -0.0373]])
# #######################################################################################################################
# #######################################################################################################################
# tensor([[-0.0191,  0.0041,  0.0151],
#         [-0.0012, -0.0271,  0.0278],
#         [ 0.0154, -0.0014, -0.0154]])
# #######################################################################################################################
# #######################################################################################################################
# tensor([[-0.0166,  0.0149,  0.0018],
#         [ 0.0196, -0.0533,  0.0331],
#         [ 0.0146, -0.0001, -0.0160]])
# #######################################################################################################################

In [11]:
matrices = [
    torch.tensor([[-0.0176,  0.0203, -0.0027],
        [ 0.0202, -0.0243,  0.0035],
        [ 0.0191, -0.0252,  0.0046]]),

    torch.tensor([[-0.0166,  0.0161,  0.0005],
        [ 0.0208, -0.0217,  0.0003],
        [ 0.0185, -0.0195, -0.0005]]),

    torch.tensor([[-0.0344,  0.0664, -0.0320],
        [ 0.0350, -0.0763,  0.0407],
        [-0.0362,  0.0720, -0.0373]]),

    torch.tensor([[-0.0191,  0.0041,  0.0151],
        [-0.0012, -0.0271,  0.0278],
        [ 0.0154, -0.0014, -0.0154]]),

    torch.tensor([[-0.0166,  0.0149,  0.0018],
        [ 0.0196, -0.0533,  0.0331],
        [ 0.0146, -0.0001, -0.0160]])
]

tensor_stack = torch.stack(matrices)
mean = torch.mean(tensor_stack, dim=0)
std = torch.std(tensor_stack, dim=0)
result = torch.abs(mean) / std
print("Abs(Mean) / Std:\n", result)

Abs(Mean) / Std:
 tensor([[2.7311, 1.0046, 0.1995],
        [1.4599, 1.7128, 1.1624],
        [0.2636, 0.1325, 0.7895]])
