In [3]:
import numpy as np
import pandas as pd
from scipy import stats

def partial_correlation(X, Y, Z):
    """
    Calculate the partial correlation between X and Y given Z.
    """
    X_resid = stats.linregress(Z, X).resid
    Y_resid = stats.linregress(Z, Y).resid
    return stats.pearsonr(X_resid, Y_resid)[0]

def conditional_independence_test(X, Y, Z, alpha):
    """
    Perform a conditional independence test using partial correlation.
    """
    if Z.size == 0:
        corr, p_value = stats.pearsonr(X, Y)
    else:
        corr = partial_correlation(X, Y, Z)
        n = len(X)
        k = Z.shape[1] if Z.ndim > 1 else 1
        t_stat = corr * np.sqrt((n - k - 2) / (1 - corr**2))
        p_value = 2 * (1 - stats.t.cdf(abs(t_stat), n - k - 2))
    
    return p_value > alpha, p_value

def get_lagged_variables(data, tau_max):
    """
    Create lagged variables up to tau_max.
    """
    n, m = data.shape
    lagged_data = np.zeros((n - tau_max, m * (tau_max + 1)))
    
    for t in range(tau_max + 1):
        lagged_data[:, t*m:(t+1)*m] = data[tau_max-t:n-t, :]
    
    return lagged_data

def pcmci_algorithm(data, alpha, tau_max):
    """
    Implement the PCMCI algorithm.
    
    Args:
    data (pd.DataFrame): Time series data with columns as variables and rows as time steps.
    alpha (float): Significance threshold for conditional independence tests.
    tau_max (int): Maximum time lag to consider.
    
    Returns:
    dict: A dictionary containing the causal graph and p-values.
    """
    n, m = data.shape
    var_names = data.columns
    
    # Step 1: Create lagged variables
    lagged_data = get_lagged_variables(data.values, tau_max)
    
    # Step 2: Estimate the parents of each target variable
    parents = {var: [] for var in var_names}
    p_values = np.ones((m, m, tau_max + 1))
    
    for j in range(m):  # For each target variable
        Y = lagged_data[:, j]
        potential_causes = list(range(m * (tau_max + 1)))
        potential_causes.remove(j)  # Remove instantaneous effect
        
        # Iterative conditional independence tests
        for p in range(tau_max + 1):
            if not potential_causes:
                break
            
            new_parents = []
            for i in potential_causes:
                X = lagged_data[:, i]
                Z = lagged_data[:, parents[var_names[j]]]
                
                is_independent, p_value = conditional_independence_test(X, Y, Z, alpha)
                
                var_idx, lag = divmod(i, m)
                p_values[j, var_idx, lag] = p_value
                
                if not is_independent:
                    new_parents.append(i)
            
            parents[var_names[j]].extend(new_parents)
            potential_causes = [i for i in potential_causes if i not in new_parents]
    
    # Step 3: Create causal graph
    causal_graph = {var: [] for var in var_names}
    for j, var in enumerate(var_names):
        for i in parents[var]:
            cause_idx, lag = divmod(i, m)
            causal_graph[var].append((var_names[cause_idx], lag))
    
    return {'causal_graph': causal_graph, 'p_values': p_values}

def interpret_results(result, var_names):
    """
    Interpret the results of the PCMCI algorithm.
    """
    causal_relationships = []
    
    for effect, causes in result['causal_graph'].items():
        for cause, lag in causes:
            strength = -np.log(result['p_values'][var_names.index(effect), var_names.index(cause), lag])
            causal_relationships.append({
                'Effect': effect,
                'Cause': f"{cause}(t-{lag})",
                'Strength': strength,
                'p-value': result['p_values'][var_names.index(effect), var_names.index(cause), lag]
            })
    
    return pd.DataFrame(causal_relationships)

# Example usage
if __name__ == "__main__":
    # Create a sample DataFrame (replace this with your actual data)
    np.random.seed(42)
    dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
    data = pd.DataFrame({
        'A': np.random.randn(len(dates)),
        'B': np.random.randn(len(dates)),
        'C': np.random.randn(len(dates))
    }, index=dates)
    
    # Run PCMCI
    alpha = 0.05
    tau_max = 5
    result = pcmci_algorithm(data, alpha, tau_max)
    
    # Interpret results
    causal_df = interpret_results(result, data.columns)
    
    print(causal_df)

IndexError: index 3 is out of bounds for axis 1 with size 3