# Example Usage and Comparison with R Implementation

Import the necessary packages:

In [1]:
import numpy as np
from rpy2.robjects.packages import importr
from rpy2.robjects import numpy2ri

from pyGCM.gcm_test import GCMTest
from pyGCM.residual_methods import linear_regression_residuals

# Activate rpy2's conversion
numpy2ri.activate()

# Import the R package containing gcm_test
GCM = importr('GeneralisedCovarianceMeasure')


The following function compares the Python-based GCM test with the R-based GCM test (kernel ridge).

In [2]:
def compare_gcm(E, X, Y,
                get_resid='GP',
                categorical_E=False,
                categorical_X=False,
                categorical_Y=False,
                resid_E=None,
                resid_Y=None):
    """
    Compare Python-based GCM test with the R-based GCM test (kernel ridge).

    Args:
        E, X, Y (np.ndarray): Data arrays with shape (n_samples, ...)
        get_resid (str): 'GP', 'linear', 'categorical', or callable.
        categorical_E, categorical_X, categorical_Y (bool):
            Indicate which variables should be treated as categorical in Python GCM.
        resid_E, resid_Y (np.ndarray or None):
            Optional precomputed residuals for E, Y given X (shape ~ (n,) or (n,1)).

    Returns:
        (float, float, float): (python_test_stat, python_p_val, r_p_val)
    """
    py_gcm_tester = GCMTest(get_resid=get_resid)
    python_test_stat, python_p_val = py_gcm_tester.fit(
        E, X, Y,
        resid_E=resid_E,
        resid_Y=resid_Y,
        categorical_E=categorical_E,
        categorical_X=categorical_X,
        categorical_Y=categorical_Y
    )

    # Handle R's gcm_test
    r_resid_XonZ = None
    r_resid_YonZ = None
    if resid_Y is not None and resid_E is not None:
        r_resid_XonZ = resid_Y.reshape(-1, 1)
        r_resid_YonZ = resid_E.reshape(-1, 1)
        r_gcm_result = GCM.gcm_test(
            X=Y,
            Y=E,
            Z=X,
            regr_method="kernel.ridge",
            resid_XonZ=r_resid_XonZ,
            resid_YonZ=r_resid_YonZ
        )
    else:
        r_gcm_result = GCM.gcm_test(
            X=Y,
            Y=E,
            Z=X,
            regr_method="kernel.ridge"
        )

    r_p_val = r_gcm_result.rx2('p.value')[0]

    print("Python GCM Test (possibly with precomputed residuals):")
    print(f"   Test Statistic = {python_test_stat:.6f}")
    print(f"   p-value        = {python_p_val:.6g}")

    print("R GCM Test (kernel.ridge):")
    print(f"   p-value        = {r_p_val:.6g}")
    print("-" * 50)

    return python_test_stat, python_p_val, r_p_val


## Test 1: All continuous variables

For all tests it holds that E is independent of Y given X. We should therefore expect a p-value close to 1.
For the first test we see that pyGCm compares well with the R implementation.

In [3]:
np.random.seed(42)
n = 100

X_continuous = np.random.randn(n, 2)
E_continuous = 0.5 * X_continuous[:, [0]] + 0.1 * np.random.randn(n, 1)
Y_continuous = 0.8 * X_continuous[:, [1]] + 0.1 * np.random.randn(n, 1)

print("=== TEST 1: All Continuous ===")
compare_gcm(E_continuous, X_continuous, Y_continuous,
            get_resid='GP',
            categorical_E=False,
            categorical_X=False,
            categorical_Y=False)


=== TEST 1: All Continuous ===
Python GCM Test (possibly with precomputed residuals):
   Test Statistic = 0.097400
   p-value        = 0.922409
R GCM Test (kernel.ridge):
   p-value        = 0.437556
--------------------------------------------------


(0.09739953801417615, 0.9224091115969792, 0.4375555904040856)

## Test 2: E is Categorical

In [4]:
X_continuous = np.random.randn(n, 2)
E_categorical = np.random.choice([0, 1, 2], size=(n, 1), p=[0.3, 0.5, 0.2])
Y_continuous = 0.8 * X_continuous[:, [1]] + 0.1 * np.random.randn(n, 1)

print("=== TEST 2: E is Categorical ===")
compare_gcm(E_categorical, X_continuous, Y_continuous,
            get_resid='GP',
            categorical_E=True,
            categorical_X=False,
            categorical_Y=False)

=== TEST 2: E is Categorical ===
Python GCM Test (possibly with precomputed residuals):
   Test Statistic = 0.815857
   p-value        = 0.414582
R GCM Test (kernel.ridge):
   p-value        = 0.0426315
--------------------------------------------------


(0.8158570161554344, 0.41458191881370343, 0.042631474121915036)

## Test 3: X is Categorical

In [5]:
E_continuous = 0.5 * np.random.randn(n, 1)
X_categorical = np.random.choice([0, 1, 2], size=(n, 1))
Y_continuous = 0.5 * X_categorical + 0.1 * np.random.randn(n, 1)

print("=== TEST 3: X is Categorical ===")
compare_gcm(E_continuous, X_categorical, Y_continuous,
            get_resid='categorical',
            categorical_E=False,
            categorical_X=True,
            categorical_Y=False)


=== TEST 3: X is Categorical ===
Python GCM Test (possibly with precomputed residuals):
   Test Statistic = 0.658762
   p-value        = 0.510049
R GCM Test (kernel.ridge):
   p-value        = 0.515523
--------------------------------------------------


(0.6587621002531419, 0.510048548186609, 0.5155232669967633)

## Test 4: Precomputed Residuals

For the fourth test we precompute the residuals using Python's linear regression and pass them to the GCM test. Here we expect the p-value to be the same between the R and Python implementations.

In [7]:
# Generate synthetic data
X_cont = np.random.randn(n, 2)
E_cont = 0.5 * X_cont[:, [0]] + 0.1 * np.random.randn(n, 1)
Y_cont = 0.8 * X_cont[:, [1]] + 0.1 * np.random.randn(n, 1)

# Precompute residuals using Python's linear regression
resid_E_lin = linear_regression_residuals(E_cont, X_cont)
resid_Y_lin = linear_regression_residuals(Y_cont, X_cont)

print("=== TEST 4: Precomputed Residuals ===")
compare_gcm(
    E_cont, X_cont, Y_cont,
    get_resid='linear',
    categorical_E=False,
    categorical_X=False,
    categorical_Y=False,
    resid_E=resid_E_lin,
    resid_Y=resid_Y_lin
)

=== TEST 4: Precomputed Residuals ===
Python GCM Test (possibly with precomputed residuals):
   Test Statistic = 0.910581
   p-value        = 0.362516
R GCM Test (kernel.ridge):
   p-value        = 0.362516
--------------------------------------------------


(0.9105813815904004, 0.362515984146232, 0.36251598414623215)