Code for generating the results for semi-synthetic discrete graphs from [*bnlearn*](https://www.bnlearn.com/) (Fig. 18).

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import numpy
import pandas
import networkx
from itertools import combinations, permutations
import logging
from causallearn.utils.cit import CIT
from sklearn import linear_model
import collections
import matplotlib.pyplot as plt
import pickle as pkl
from datetime import datetime
import copy

from causal_discovery.utils import *
from causal_discovery.pc_alg import PCAlgorithm
from causal_discovery.mb_by_mb import MBbyMBAlgorithm
from causal_discovery.sd_alg import SequentialDiscoveryAlgorithm
from causal_discovery.ldecc import LDECCAlgorithm

In [None]:
def get_bnlearn_graph(name):
  import pickle
  return pickle.load(open("data/bnlearn_%s_graph.pkl" % name, "rb"))

In [None]:
# One of "alarm", "insurance", or "mildew".
graph_name = "mildew"

In [None]:
graph_true = get_bnlearn_graph(graph_name)

In [None]:
node = list(graph_true.nodes())[0]

In [None]:
pc_alg = PCAlgorithm(use_ci_oracle=True, graph_true=graph_true,
                     treatment_node=node)
cpdag_pc, _, _ = pc_alg.run(pd.DataFrame(columns=list(graph_true.nodes())))
print("Total CI tests done: %d" % pc_alg.ci_test_calls["total"])

In [None]:
mb_by_mb_alg = MBbyMBAlgorithm(use_ci_oracle=True, graph_true=graph_true,
                               treatment_node=node)
result_mb_by_mb = mb_by_mb_alg.run(pd.DataFrame(columns=list(graph_true.nodes())))
print("Total CI tests done: %d" % mb_by_mb_alg.ci_test_calls["total"])

In [None]:
sd_alg = SequentialDiscoveryAlgorithm(use_ci_oracle=True, graph_true=graph_true,
                                      treatment_node=node)
result_sd = sd_alg.run(pd.DataFrame(columns=list(graph_true.nodes())))
print("Total CI tests done: %d" % sd_alg.ci_test_calls["total"])

In [None]:
ldecc_alg = LDECCAlgorithm(use_ci_oracle=True, graph_true=graph_true, 
                           treatment_node=node, outcome_node=node)
result_ldecc = ldecc_alg.run(pd.DataFrame(columns=list(graph_true.nodes())))
print("Total CI tests done: %d" % ldecc_alg.ci_test_calls["total"])

Below we run the local causal discovery algorithms by setting each node as the treatment and plotting the distribution of the number of conditional independence tests.

In [None]:
def run_local_discovery_for_each_node(graph_true):

  node_to_tests = {}

  for node in graph_true.nodes():
    empty_df = pd.DataFrame(columns=list(graph_true.nodes()))

    mb_by_mb_alg = MBbyMBAlgorithm(use_ci_oracle=True, graph_true=graph_true,
                                   treatment_node=node)
    result_mb_by_mb = mb_by_mb_alg.run(empty_df)

    sd_alg = SequentialDiscoveryAlgorithm(use_ci_oracle=True, graph_true=graph_true,
                                          treatment_node=node)
    result_sd = sd_alg.run(empty_df)

    ldecc_alg = LDECCAlgorithm(use_ci_oracle=True, graph_true=graph_true, 
                              treatment_node=node, outcome_node=node)
    result_ldecc = ldecc_alg.run(empty_df)
    
    node_to_tests[node] = {
        "mb-by-mb": mb_by_mb_alg.ci_test_calls["total"],
        "ldecc": ldecc_alg.ci_test_calls["total"],
        "sd-alg": sd_alg.ci_test_calls["total"],
    }

    print("Node done: %s, Tests: %s" % (node, node_to_tests[node]))
  
  return node_to_tests
  
node_to_tests = run_local_discovery_for_each_node(graph_true)

In [None]:
# For the color map:
# https://gist.github.com/AndiH/c957b4d769e628f506bd

# Tableau 20 Colors
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),  
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),  
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),  
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),  
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
             
# Tableau Color Blind 10
tableau20blind = [(0, 107, 164), (255, 128, 14), (171, 171, 171), (89, 89, 89),
             (95, 158, 209), (200, 82, 0), (137, 137, 137), (163, 200, 236),
             (255, 188, 121), (207, 207, 207)]
  
# Rescale to values between 0 and 1 
for i in range(len(tableau20)):  
    r, g, b = tableau20[i]  
    tableau20[i] = (r / 255., g / 255., b / 255.)
for i in range(len(tableau20blind)):  
    r, g, b = tableau20blind[i]  
    tableau20blind[i] = (r / 255., g / 255., b / 255.)

In [None]:
SMALL_SIZE = 8
MEDIUM_SIZE = 8
BIGGER_SIZE = 8

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE+6)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE+6)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE + 4)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE+4)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE+2)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE+20)  # fontsize of the figure title

In [None]:
plot_prop = {
    "pc-alg": ["dashed", tableau20blind[0], "o"],
    "ldecc": ["dashdot", tableau20blind[1], "^"],
    "ldecc-checks": ["solid", tableau20blind[5], "s"],
    "mb-by-mb": ["dashdot", tableau20blind[7], "D"],
    "sd-alg": ["dotted", tableau20blind[3], "v"],
}

In [None]:
def plot_test_statistics(node_to_tests):
  plt.figure(figsize=(6, 4))

  tests_mb_by_mb = [node_to_tests[k]["mb-by-mb"] for k in node_to_tests.keys()]
  tests_sd = [node_to_tests[k]["sd-alg"] for k in node_to_tests.keys()]
  tests_ldecc = [node_to_tests[k]["ldecc"] for k in node_to_tests.keys()]

  plt.hist([tests_mb_by_mb, tests_sd, tests_ldecc],
           label=["MB-by-MB", "SD", "LDECC"], 
           color=[plot_prop["mb-by-mb"][1], 
                  plot_prop["sd-alg"][1], plot_prop["ldecc"][1]])
  plt.axvline(pc_alg.ci_test_calls["total"], label="PC",
              color=plot_prop["pc-alg"][1])
  plt.legend()
  plt.title("Distribution of CI tests")
  plt.xlabel("Number of CI tests")
  plt.ylabel("Number of nodes")
  plt.show()


plot_test_statistics(node_to_tests)