# Causality & Correlation Analysis

In [2]:
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import grangercausalitytests
import numpy as np
import pandas as pd
import os

TABLES_DIRECTORY = "../../Data/giant_tables"

In [3]:
patient_hup_ids = []
# Iterate through all files in TABLES_DIRECTORY
for filename in os.listdir(TABLES_DIRECTORY):
    # Only look at filename that are .csv files and does not begin with .
    if filename.endswith(".csv") and not filename.startswith("."):
        # Get the patient_hup_id from the filename which is after _ and before .
        patient_hup_id = filename.split("_")[1].split(".")[0]
        patient_hup_ids.append(patient_hup_id)

patient_hup_ids = sorted(patient_hup_ids)
len(patient_hup_ids)

62

## Do interictal spikes drive phase synchronization?

In [9]:
for patient_hup_id in patient_hup_ids:
    # Read in the giant table for this patient
    hourly_patient_features_df = pd.read_csv(
        os.path.join(TABLES_DIRECTORY, f"HUP_{patient_hup_id}.csv")
    )

    # Assuming you already have the dataframe hourly_patient_features_df
    df = hourly_patient_features_df[["spikes_avg_all", "kuramoto"]]

    # It's recommended to handle missing values and make the series stationary
    # (via differencing or some other method) before the Granger test
    df = df.dropna()
    df = df.diff().dropna()

    # Perform the Granger Causality test
    # maxlag is the maximum lag that you would like the test to check
    granger_test_result = grangercausalitytests(df, maxlag=10)

    # Print the test results
    for lag, test_result in granger_test_result.items():
        print(f"Lag: {lag}")
        for test_name, result in test_result[0].items():
            t_statistic, p_value = result[0], result[1]
            print(f"{test_name} Test statistic value: {t_statistic}")
            print(f"{test_name} p-value: {p_value}\n")
            if p_value < 0.05:
                print(f"Null hypothesis rejected for {test_name} test")
                break


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=6.8674  , p=0.0098  , df_denom=127, df_num=1
ssr based chi2 test:   chi2=7.0296  , p=0.0080  , df=1
likelihood ratio test: chi2=6.8462  , p=0.0089  , df=1
parameter F test:         F=6.8674  , p=0.0098  , df_denom=127, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=1.3811  , p=0.2551  , df_denom=124, df_num=2
ssr based chi2 test:   chi2=2.8736  , p=0.2377  , df=2
likelihood ratio test: chi2=2.8421  , p=0.2415  , df=2
parameter F test:         F=1.3811  , p=0.2551  , df_denom=124, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.8200  , p=0.4852  , df_denom=121, df_num=3
ssr based chi2 test:   chi2=2.6025  , p=0.4571  , df=3
likelihood ratio test: chi2=2.5764  , p=0.4616  , df=3
parameter F test:         F=0.8200  , p=0.4852  , df_denom=121, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.1038  , p=0.3581  