In [1]:
import pandas as pd
import numpy as np

from causallearn.search.ScoreBased.GES import ges
from causallearn.score.LocalScoreFunction import local_score_cv_general

from sklearn.preprocessing import StandardScaler

from notears import linear, nonlinear, utils

from CausalDisco.analytics import (
    var_sortability,
    r2_sortability,
    snr_sortability
)

from CausalDisco.baselines import (
    r2_sort_regress,
    var_sort_regress
)

In [2]:
def convert_pc_to_adjacency(G_pc: np.array):
    """   Adapt the causal learn format to get an adjacency matrix

    Args:
        G_pc (np.array): _description_

    Raises:
        ValueError: _description_

    Returns:
        W_pc: valid adjacency matrix, with possible -1 for undirected edges. Undirected edges only appear once.
    """
    W_pc = np.zeros((len(G_pc), len(G_pc)))
    for i in range(len(G_pc)):
        for j in range(len(G_pc)):
            if G_pc[j][i] == 1 and G_pc[i][j] == -1:
                W_pc[i][j] = 1
            elif G_pc[j][i] == -1 and G_pc[i][j] == -1:
                W_pc[i][j] = -1
                W_pc[j][i] = 0
            elif G_pc[i][j] == 1 and G_pc[j][i] == 1:
                raise ValueError("Impossible edge")
    return W_pc

class SortMetric:
    def __init__(self, W, sortmetric):
        self.W = W
        self.sortmetric = sortmetric

    def __call__(self, data):
        return self.sortmetric(data, self.W)


def bootstrap(data, statistic, n_resamples=50):
    res = np.array([])
    for _ in range(n_resamples):
        sample = data.sample(n=int(len(data) * .8), replace=True)
        s = statistic(sample.to_numpy())
        res = np.append(res, s)

    return res

def learn_weight_matrices(df):
    Record = ges(df.values)
    graph_ges = (Record["G"].graph)
    W_ges = convert_pc_to_adjacency(graph_ges)

    W_nt = linear.notears_linear(df.to_numpy(), lambda1=0.1, loss_type='l2')
    W_nt[W_nt != 0] = 1


    W_srg_r2 = r2_sort_regress(df.to_numpy())
    W_srg_r2[W_srg_r2 != 0] = 1
    W_srg_var = var_sort_regress(df.to_numpy())
    W_srg_var[W_srg_var != 0] = 1

    return W_ges, W_nt, W_srg_r2, W_srg_var


def compute_sortability(W, df):
    varsort = SortMetric(W, var_sortability)
    r2sort = SortMetric(W, r2_sortability)
    snrsort = SortMetric(W, snr_sortability)

    return bootstrap(df, varsort), bootstrap(df, r2sort), bootstrap(df, snrsort)

In [3]:
data = {
    "nasa": "https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/airfoil-self-noise/data/airfoil-self-noise.continuous.txt",
    "boston": "https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/boston-housing/data/boston-housing.continuous.txt",
    "iqbrain": "https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/iq-brain-size/data/iq_brain_size.continuous.txt",
    "sachs": "https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/sachs/data/sachs.2005.continuous.txt",
    "wine": "https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/wine-quality/data/winequality-red.continuous.txt",
    #"supercon" : "https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/superconductivity/data/superconductivity.continuous.txt",
}

In [None]:


for k, v in data.items():
    df = pd.read_csv(v, sep="\t")
    

    W_ges, W_nt, W_srg_r2, W_srg_var = learn_weight_matrices(df)

    ges_res = compute_sortability(W_ges, df)
    nt_res = compute_sortability(W_nt, df)
    r2_res = compute_sortability(W_srg_r2, df)
    snr_res = compute_sortability(W_srg_var, df)
    
    print(f"{k} - GES: {ges_res[0].mean()} ({ges_res[0].std()}) & {ges_res[1].mean()} ({ges_res[1].std()}) & {ges_res[2].mean()} ({ges_res[2].std()})")
    print(f"{k} - NT: {nt_res[0].mean()} ({nt_res[0].std()}) & {nt_res[1].mean()} ({nt_res[1].std()}) & {nt_res[2].mean()} ({nt_res[2].std()})")
    print(f"{k} - R2: {r2_res[0].mean()} ({r2_res[0].std()}) & {r2_res[1].mean()} ({r2_res[1].std()}) & {r2_res[2].mean()} ({r2_res[2].std()})")
    print(f"{k} - SNR: {snr_res[0].mean()} ({snr_res[0].std()}) & {snr_res[1].mean()} ({snr_res[1].std()}) & {snr_res[2].mean()} ({snr_res[2].std()})")




    

nasa - GES: 0.7916666666666665 (1.1102230246251565e-16) & 0.5116666666666666 (0.062383224240709655) & 0.9166666666666665 (1.1102230246251565e-16)
nasa - NT: 1.0 (0.0) & 0.11555555555555554 (0.021773242158072692) & 1.0 (0.0)
nasa - R2: 0.23076923076923075 (2.7755575615628914e-17) & 0.99 (0.016870547845739457) & 1.0 (0.0)
nasa - SNR: 1.0 (0.0) & 0.1928 (0.024903011866037415) & 0.7727999999999998 (0.02932848444771736)
boston - GES: 0.5950318471337579 (0.007327886378736128) & 0.45082802547770695 (0.03322594843854662) & 0.8859872611464968 (0.02332462856509624)
boston - NT: 0.9949618320610688 (0.004732824427480924) & 0.510381679389313 (0.022901272258976656) & 0.7955725190839695 (0.010609313986620886)
boston - R2: 0.5104938271604939 (0.029565259295871726) & 0.9890123456790124 (0.00768908870075062) & 0.9680864197530863 (0.009292327780811368)
boston - SNR: 0.9939563862928348 (0.00410506327975804) & 0.5736448598130841 (0.01731156880514302) & 0.8495327102803738 (0.017437136410163902)
iqbrain - GE

In [None]:
# COMPUTE SORTABILITY METRICS

print("NOTEARS VAR-sort NASA", var_sortability(df_nasa.to_numpy(), W_nt))
print("GES VAR-sort NASA", var_sortability(df_nasa.to_numpy(), W_ges))
print("SRG(R2) VAR-sort NASA", var_sortability(df_nasa.to_numpy(), W_srg_r2))
print("SRG(VAR) VAR-sort NASA", var_sortability(df_nasa.to_numpy(), W_srg_var))
print("---")

print("NOTEARS R2-sort NASA", r2_sortability(df_nasa.to_numpy(), W_nt))
print("GES R2-sort NASA", r2_sortability(df_nasa.to_numpy(), W_ges))
print("SRG(R2) R2-sort NASA", r2_sortability(df_nasa.to_numpy(), W_srg_r2))
print("SRG(VAR) R2-sort NASA", r2_sortability(df_nasa.to_numpy(), W_srg_var))
print("---")

print("NOTEARS SNR-sort NASA", snr_sortability(df_nasa.to_numpy(), W_nt))
print("GES SNR-sort NASA", snr_sortability(df_nasa.to_numpy(), W_ges))
print("SRG(R2) SNR-sort NASA", snr_sortability(df_nasa.to_numpy(), W_srg_r2))
print("SRG(VAR) SNR-sort NASA", snr_sortability(df_nasa.to_numpy(), W_srg_var))



In [None]:

     
    


SM = SortMetric(W_nt, sortmetric=var_sortability)
bs = bootstrap(df_nasa, SM)
print(bs.mean(), bs.std())