In [49]:
import cdt
import networkx as nx
import warnings
import numpy as np
import os
from pandas import DataFrame
from cdt.metrics import SHD
from cdt.metrics import SID
import json
from json.decoder import JSONDecodeError

warnings.filterwarnings("ignore")  # silence warnings

cdt.SETTINGS.rpath = 'C:\Program Files\R\R-4.2.1\\bin\Rscript' # path to your r executable
cdt.SETTINGS.rpath

'C:\\Program Files\\R\\R-4.2.1\\bin\\Rscript'

In [2]:
PREDS_DIR = "outputs"
DATA_DIR = "data"
PATH_SPLITTER = "\\"

In [3]:
os.path.isdir(PREDS_DIR)

True

In [4]:
method_dirs = os.listdir(path=PREDS_DIR)
method_dirs

['cam']

# Process data

In [10]:
def retrieve_adjacency_matrix(graph, order_nodes=None, weight=False):
    """Retrieve the adjacency matrix from the nx.DiGraph or numpy array."""
    if isinstance(graph, np.ndarray):
        return graph
    elif isinstance(graph, nx.DiGraph):
        if order_nodes is None:
            order_nodes = graph.nodes()
        if not weight:
            return np.array(nx.adjacency_matrix(graph, order_nodes, weight=None).todense())
        else:
            return np.array(nx.adjacency_matrix(graph, order_nodes).todense())
    else:
        raise TypeError("Only networkx.DiGraph and np.ndarray (adjacency matrixes) are supported.")

In [50]:
loops = 0
for meth_dir in method_dirs:
    method_path = os.path.join(PREDS_DIR, meth_dir)
    mechanism_dirs = os.listdir(path=method_path)

    for mech_dir in mechanism_dirs:
        if loops == 1:
            break
        mechanism_path = os.path.join(method_path, mech_dir)
        dag_mech_path = os.path.join(DATA_DIR, mech_dir)
        dataset_dirs = os.listdir(path=mechanism_path)
        
        for dataset_dir in dataset_dirs:
            if loops == 1:
                break
            dataset_path = os.path.join(mechanism_path, dataset_dir)
            pred_path = os.path.join(dataset_path, "pred.npy")
            metrics_path = os.path.join(dataset_path, "metrics.json")
            if os.path.exists(metrics_path):
                with open(metrics_path, 'r') as fp:
                    try:
                        json.load(fp)
                        continue
                    except JSONDecodeError:
                        pass

            dag_dataset_path = os.path.join(dag_mech_path, dataset_dir)
            dag_path = os.path.join(dag_dataset_path, "DAG1.npy")
            
            # Load true dag and pred dag
            pred = np.load(pred_path)
            dag = np.load(dag_path)
            true_labels = retrieve_adjacency_matrix(dag)
            predictions = retrieve_adjacency_matrix(pred, dag.nodes() 
                                            if isinstance(dag, nx.DiGraph) else None)

            # Calculcate metrics
            # Find flipped edges
            transposed_pred = predictions.T
            np.place(transposed_pred, transposed_pred == 1, 10)
            diff_t = (true_labels -transposed_pred)
            unique_t, counts_t = np.unique(diff_t, return_counts=True)
            diff_dict_t = dict(zip(unique_t, counts_t))
            flipped_edges = 0
            if -9.0 in diff_dict_t.keys():
                flipped_edges = diff_dict_t[-9.0]

            # Find false positive and false negative
            diff = (true_labels - predictions)
            unique, counts = np.unique(diff, return_counts=True)
            diff_dict = dict(zip(unique, counts))
            false_positive_edges = 0
            false_negative_edges = 0
            if -1.0 in diff_dict_t.keys():
                false_positive_edges = diff_dict[-1.0]
            if 1 in diff_dict_t.keys():
                false_negative_edges = diff_dict[1.0]

            results_dict = {
                "SHD": int(SHD(dag, pred, double_for_anticausal=False)),
                "SID": int(SID(dag, pred)),
                "flipped_edges": int(flipped_edges),
                "false_positive_edges": int(false_positive_edges),
                "false_negative_edges": int(false_negative_edges-flipped_edges)
            }
            with open(metrics_path, 'w') as fp:
                json.dump(results_dict, fp)
            



outputs\cam\gaussian_add_mechanism\medium_all\metrics.json
outputs\cam\gaussian_add_mechanism\medium_confound\metrics.json
outputs\cam\gaussian_add_mechanism\medium_selection_bias\metrics.json
outputs\cam\gaussian_add_mechanism\medium_selection_bias_confound\metrics.json
outputs\cam\gaussian_add_mechanism\medium_selection_bias_unfaithful\metrics.json


## Read in results

In [None]:
for data_dir in data_dirs:
    data_path = data_dir.split(PATH_SPLITTER)[1:]
    mechanism = data_path[0]
    dataset = data_path[1]

    input_mechanism_path = os.path.join(DATA_DIR, mechanism)
    output_mechanism_path = os.path.join(notear_path, mechanism)
    # Create results dir each causal mechanism group of data
    if not os.path.isdir(output_mechanism_path):
        os.mkdir(output_mechanism_path)

    # Create results dir each dataset
    input_dataset_path = os.path.join(input_mechanism_path, dataset)
    output_dataset_path = os.path.join(output_mechanism_path, dataset)
    if not os.path.isdir(output_dataset_path):
        os.mkdir(output_dataset_path)
    
    if len(os.listdir(output_dataset_path)) == 0:
        dataset_path = os.path.join(input_dataset_path, 'data1.npy')

        # Process
        data = np.load(dataset_path)
        obj = CAM()
        pred_graph = obj.predict(DataFrame(data))

        pred_path = os.path.join(output_dataset_path, "pred.npy")
        np.save(pred_path, nx.to_numpy_array(pred_graph))  
