In [3]:
import numpy as np
import os

from citk.tests.simple_tests import FisherZ, Spearman



In [3]:
import numpy as np
from citk.tests import ChiSq

# Generate discrete data for a chain: X -> Z -> Y
# X, Y, and Z have 3, 3, and 2 levels respectively.
n = 500
X = np.random.randint(0, 3, size=n)
Z = (X + np.random.randint(0, 2, size=n)) % 3
Y = (Z + np.random.randint(0, 2, size=n)) % 3
data = np.vstack([X, Y, Z]).T

# Initialize the test
g_sq_test = ChiSq(data)

# Test for unconditional independence
p_value_unconditional = g_sq_test(0, 1)
print(f"P-value (unconditional) for X _||_ Y: {p_value_unconditional:.4f}")
# Expected: p_value is small (dependent)

# Test for conditional independence given Z
p_value_conditional = g_sq_test(0, 1, [2])
print(f"P-value (conditional) for X _||_ Y | Z: {p_value_conditional:.4f}")
# Expected: p_value is large (independent)

P-value (unconditional) for X _||_ Y: 0.0000
P-value (conditional) for X _||_ Y | Z: 0.0870


In [2]:
from causallearn.search.ConstraintBased.PC import pc

# The same discrete data from the standalone example
cg = pc(data, alpha=0.05, indep_test='gsq')

print("Estimated Causal Graph:")
print(cg.G)

  0%|          | 0/3 [00:00<?, ?it/s]

Estimated Causal Graph:
Graph Nodes:
X1;X2;X3

Graph Edges:
1. X1 --- X3
2. X2 --- X3



In [16]:
import numpy as np
from citk.tests import ChiSq

# Generate discrete data representing a collider: X -> Y <- Z
# X and Z should be dependent given Y
n = 500
X = np.random.randint(0, 2, size=n)
Z = np.random.randint(0, 2, size=n)
Y_true = (X + Z) % 2
noise_mask = np.random.choice([0, 1], size=n, p=[0.9, 0.1])
Y = (Y_true + noise_mask) % 2
data = np.vstack([X, Y, Z]).T

# Initialize the test
chisq_test = ChiSq(data)

# Test for unconditional independence (X and Z are independent)
p_value_unconditional = chisq_test(0, 2)
print(f"P-value for X _||_ Z: {p_value_unconditional:.4f}")

# Test for conditional dependence on the collider Y
p_value_conditional = chisq_test(0, 2, [1])
print(f"P-value for X _||_ Z | Y: {p_value_conditional:.4f}")

P-value for X _||_ Z: 0.6601
P-value for X _||_ Z | Y: 0.0000


In [2]:
from causallearn.search.ConstraintBased.PC import pc

# The same data from the standalone example
cg = pc(data, alpha=0.05, indep_test='fisherz_citk')

print("Estimated Causal Graph:")
print(cg.G)

  0%|          | 0/3 [00:00<?, ?it/s]

Estimated Causal Graph:
Graph Nodes:
X1;X2;X3

Graph Edges:
1. X1 --- X3
2. X2 --- X3



In [4]:

def run_tests_on_dataset(data, dataset_name):
    """Helper function to run FisherZ and Spearman tests on a given dataset."""
    print("\n" + "="*60)
    print(f"Running FisherZ and Spearman CI Tests on: {dataset_name}")
    print(f"Data shape: {data.shape}")
    print("="*60)

    # Define the list of tests to run
    tests_to_run = [
        (FisherZ, "fisher's_z", {}),
        (Spearman, "spearman's_rho", {}),
    ]

    for TestClass, test_name, kwargs in tests_to_run:
        print(f"\n--- Testing: {test_name} ({dataset_name}) ---")

        cache_file = f"examples/cache/{dataset_name}_{test_name.lower().replace(' ', '_')}_cache.json"
        test_instance = TestClass(data, cache_path=cache_file, **kwargs)

        # Unconditional test: A and C should be dependent
        p_ac = test_instance(0, 2)
        print(f"  p-value for A _||_ C: {p_ac:.4f} -> {'Dependent' if p_ac < 0.05 else 'Independent'} (Expected: Dependent)")

        # Conditional test: A and C should be independent given B
        p_ac_b = test_instance(0, 2, [1])
        print(f"  p-value for A _||_ C | B: {p_ac_b:.4f} -> {'Dependent' if p_ac_b < 0.05 else 'Independent'} (Expected: Independent)")
        
        test_instance.save_cache()


# Create a directory for cache files if it doesn't exist
os.makedirs("examples/cache", exist_ok=True)

# =================================================================================
# Section 1: Weak Signal Data (n=200)
# Structure: A -> B -> C
# =================================================================================
np.random.seed(42)
n_weak = 200
A_weak = np.random.randn(n_weak)
B_weak = 0.7 * A_weak + 0.5 * np.random.randn(n_weak)
C_weak = 0.7 * B_weak + 0.5 * np.random.randn(n_weak)
data_weak = np.vstack([A_weak, B_weak, C_weak]).T

run_tests_on_dataset(data_weak, "Weak Signal (n=200)")

# =================================================================================
# Section 2: Strong Signal Data (n=500)
# Structure: A -> B -> C
# =================================================================================
np.random.seed(42)
n_strong = 500
A_strong = np.random.randn(n_strong)
B_strong = 0.9 * A_strong + 0.2 * np.random.randn(n_strong)
C_strong = 0.9 * B_strong + 0.2 * np.random.randn(n_strong)
data_strong = np.vstack([A_strong, B_strong, C_strong]).T

run_tests_on_dataset(data_strong, "Strong Signal (n=500)")

print("\n" + "="*60)
print("Synthetic data test demonstration complete.")
print("="*60) 


Running FisherZ and Spearman CI Tests on: Weak Signal (n=200)
Data shape: (200, 3)

--- Testing: fisher's_z (Weak Signal (n=200)) ---
  p-value for A _||_ C: 0.0000 -> Dependent (Expected: Dependent)
  p-value for A _||_ C | B: 0.3900 -> Independent (Expected: Independent)

--- Testing: spearman's_rho (Weak Signal (n=200)) ---
  p-value for A _||_ C: 0.0000 -> Dependent (Expected: Dependent)
  p-value for A _||_ C | B: 0.6032 -> Independent (Expected: Independent)

Running FisherZ and Spearman CI Tests on: Strong Signal (n=500)
Data shape: (500, 3)

--- Testing: fisher's_z (Strong Signal (n=500)) ---
  p-value for A _||_ C: 0.0000 -> Dependent (Expected: Dependent)
  p-value for A _||_ C | B: 0.0644 -> Independent (Expected: Independent)

--- Testing: spearman's_rho (Strong Signal (n=500)) ---
  p-value for A _||_ C: 0.0000 -> Dependent (Expected: Dependent)
  p-value for A _||_ C | B: 0.1630 -> Independent (Expected: Independent)

Synthetic data test demonstration complete.
