In [2]:
!pip install scikit-learn
!pip install networkx
!pip install joblib
!pip install scipy
!pip install numpy
!pip install matplotlib
!pip install scipy
!pip install pandas
!pip install tqdm igraph



In [3]:
from time import perf_counter
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from joblib import Parallel, delayed
import os

import dag_utils as utils
from baselines import Nonneg_dagma, MetMulDagma
from TopoGreedy import TopoGreedy
from baselines import colide_ev
from baselines import DAGMA_linear
from baselines import notears_linear


PATH = './results/samples/'
SAVE = True 
SEED = 20
N_CPUS = 1
np.random.seed(SEED)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Experiment function
def get_lamb_value(n_nodes, n_samples, times=1):
    return np.sqrt(np.log(n_nodes) / n_samples) * times 

def run_samples_exp(g, data_p, N_samples, exps, thr=.2, verb=False):
    shd, tpr, fdr, fscore, err, acyc, runtime, dag_count = [np.zeros((len(N_samples), len(exps)))  for _ in range(8)]
    for i, n_samples in enumerate(N_samples):
        if g % N_CPUS == 0:
            print(f'Graph: {g+1}, samples: {n_samples}')

        # Create data
        data_p_aux = data_p.copy()
        data_p_aux['n_samples'] = n_samples

        W_true, _, X = utils.simulate_sem(**data_p_aux)
        X_std = utils.standarize(X)
        W_true_bin = utils.to_bin(W_true, thr)
        norm_W_true = np.linalg.norm(W_true)

        for j, exp in enumerate(exps):
            X_aux = X_std if 'standarize' in exp.keys() and exp['standarize'] else X


            arg_aux = exp['args'].copy()
            if 'adapt_lamb' in exp.keys() and exp['adapt_lamb']:
                if 'lamb' in arg_aux.keys():
                    arg_aux['lamb'] = get_lamb_value(data_p['n_nodes'], n_samples, arg_aux['lamb'])
                elif 'lambda1' in arg_aux.keys():
                    arg_aux['lambda1'] = get_lamb_value(data_p['n_nodes'], n_samples, arg_aux['lambda1'])

            if exp['model'] == notears_linear:
                t_init = perf_counter()
                W_est = notears_linear(X_aux, **arg_aux)
                t_end = perf_counter()

            
            else:

                model = exp['model'](**exp['init']) if 'init' in exp.keys() else exp['model']()
                t_init = perf_counter()

                model.fit(X_aux, **arg_aux)
                t_end = perf_counter()
                
                W_est = model.W_est

            if np.isnan(W_est).any():
                W_est = np.zeros_like(W_est)
                W_est_bin = np.zeros_like(W_est)
            else:
                W_est_bin = utils.to_bin(W_est, thr)
            
            W_est_bin = W_est_bin.T if exp['model'] == TopoGreedy else W_est_bin
            W_est = W_est.T if exp['model'] == TopoGreedy else W_est


            shd[i,j], tpr[i,j], fdr[i,j] = utils.count_accuracy(W_true_bin, W_est_bin)
            fscore[i,j] = f1_score(W_true_bin.flatten(), W_est_bin.flatten())
            err[i,j] = utils.compute_norm_sq_err(W_true, W_est, norm_W_true)
            acyc[i,j] = model.dagness(W_est) if hasattr(model, 'dagness') else 1
            runtime[i,j] = t_end - t_init
            dag_count[i,j] += 1 if utils.is_dag(W_est_bin) else 0
        
            if verb and (g % N_CPUS == 0):
                print(f'\t\t-{exp["leg"]}: shd {shd[i,j]}  -  err: {err[i,j]:.3f}  -  time: {runtime[i,j]:.3f}')

    return shd, tpr, fdr, fscore, err, acyc, runtime, dag_count

In [5]:
n_dags = 50
N_samples = [1000, 5000, 10000, 20000, 50000, 100000]

# DEFINE EXPERIMENTS
Exps = [
  {'model': TopoGreedy, 'args': {'max_iter': 2000, 'tol': 1e-4}, 'fmt': 's-', 'leg': 'TopoGreedy'},
  # {'model': MetMulDagma, 'args': {'stepsize': 3e-4, 'alpha_0': .01, 'rho_0': .05, 's': 1, 'lamb': 1e-1,
  #  'iters_in': 10000, 'iters_out': 10, 'beta': 2}, 'init': {'primal_opt': 'adam', 'acyclicity': 'logdet'},
  #  'adapt_lamb': True, 'standarize': False, 'fmt': 'o-', 'leg': 'MM-Logdet'},

  # # {'model': MetMulDagma, 'args': {'stepsize': 3e-4, 'alpha_0': .01, 'rho_0': .05, 's': 1, 'lamb': .05,
  # #  'iters_in': 10000, 'iters_out': 10, 'beta': 2}, 'init': {'primal_opt': 'adam', 'acyclicity': 'logdet'},
  # #  'adapt_lamb': False, 'standarize': False, 'fmt': 'o--', 'leg': 'MM-Logdet-fix'},

  # # {'model': MetMulDagma, 'args': {'stepsize': 1e-5, 'alpha_0': 1, 'rho_0': 10, 's': 1, 'lamb': .1, 'iters_in': 10000,
  # #  'iters_out': 10, 'beta': 5, 'tol': 1e-5}, 'init': {'acyclicity': 'matexp', 'primal_opt': 'fista'}, 'standarize': False,
  # #  'adapt_lamb': True, 'fmt': 'x-', 'leg': 'MM-Matexp'},

]
N = 100
thr = .2
verb = True
data_p = {
    'n_nodes': N,
    'graph_type': 'er',
    'edges': 2*N,
    'edge_type': 'positive',
    'w_range': (.5, 1),
    'var': 1,
}

shd, tpr, fdr, fscore, err, acyc, runtime, dag_count =\
      [np.zeros((n_dags, len(N_samples), len(Exps)))  for _ in range(8)]

print('CPUs employed:', N_CPUS)

t_init = perf_counter()
results = Parallel(n_jobs=N_CPUS)(delayed(run_samples_exp)
                                  (g, data_p, N_samples, Exps, thr, verb) for g in range(n_dags))
t_end = perf_counter()
print(f'----- Solved in {(t_end-t_init)/60:.3f} minutes -----')
shd, tpr, fdr, fscore, err, acyc, runtime, dag_count = zip(*results)



CPUs employed: 1
Graph: 1, samples: 1000
		-TopoGreedy: shd 402.0  -  err: 2.000  -  time: 9.847
Graph: 1, samples: 5000
		-TopoGreedy: shd 1.0  -  err: 0.002  -  time: 7.501
Graph: 1, samples: 10000


KeyboardInterrupt: 