In [14]:
import pandas as pd
import numpy as np
import pyspi
from pyspi.calculator import Calculator
import os
from copy import deepcopy
import seaborn as sns
import glob

In [10]:
data_path="/headnode1/abry4213/data/Cogitate_MEG_challenge"

TS_data_path=f"{data_path}/derivatives/MEG_time_series/sub-CA112/ses-1/meg"

In [11]:
ROI_lookup = {"proc-0": "Category_Selective",
              "proc-1": "GNWT",
              "proc-2": "IIT"}

In [17]:
sample_TS_data_list = []
os.chdir(TS_data_path)
for TS_file in glob.glob("*.csv"):
    subject_ID = TS_file.split("_")[0]
    stimulus_type = TS_file.split("desc-")[1].split("_")[0]
    relevance_type = TS_file.split(f"{stimulus_type}_")[1].split("_")[0]
    duration = TS_file.split(f"{relevance_type}_")[1].split("_")[0]
    meta_ROI = TS_file.split(f"{duration}_")[1].split("_meta")[0]
    frequency_band = TS_file.split("freq_")[1].split("_TS")[0]

    TS_data = (pd.read_csv(f"{TS_file}")
               .assign(subject_ID=subject_ID,
                       stimulus_type=stimulus_type,
                       relevance_type=relevance_type,
                       duration=duration,
                       meta_ROI=meta_ROI,
                       frequency_band=frequency_band))

    sample_TS_data_list.append(TS_data)

sample_TS_data = pd.concat(sample_TS_data_list)
sample_TS_data['duration'] = sample_TS_data['duration'].str.replace('ms', '').astype(int)/1000

In [13]:
TS_file

'sub-CA112_ses-1_task-dur_desc-labels_hypothesis_driven.txt'

In [9]:
# Filter times to the duration range
sample_TS_data_onset = sample_TS_data.query('times >= 0.0 and times < @sample_TS_data.duration')
sample_TS_data_offset = sample_TS_data.query('times >= @sample_TS_data.duration')

# Create list of dataframes for each stimulus_type, relevance_type, duration, and frequency_band
on_sample_TS_data_list = []
off_sample_TS_data_list = []

for stimulus_type in sample_TS_data_onset['stimulus_type'].unique():
    for relevance_type in sample_TS_data_onset['relevance_type'].unique():
        for duration in [1.0]:
        # for duration in sample_TS_data_onset['duration'].unique():
            for frequency_band in sample_TS_data_onset['frequency_band'].unique():
                TS_data_on = sample_TS_data_onset.query('stimulus_type == @stimulus_type and relevance_type == @relevance_type and duration == @duration and frequency_band == @frequency_band')
                if TS_data_on.empty:
                    print(f"Missing data for {stimulus_type}, {relevance_type}, {duration}, {frequency_band}")
                on_sample_TS_data_list.append(TS_data_on)
                TS_data_off = sample_TS_data_offset.query('stimulus_type == @stimulus_type and relevance_type == @relevance_type and duration == @duration and frequency_band == @frequency_band')
                off_sample_TS_data_list.append(TS_data_off)

Missing data for false, Irrelevant, 1.0, gamma
Missing data for false, Irrelevant, 1.0, beta
Missing data for false, Irrelevant, 1.0, alpha


In [6]:
def run_pyspi_for_df(df, calc):
        # Make deepcopy of calc 
        calc_copy = deepcopy(calc)

        # Pivot so that the columns are meta_ROI and the rows are data
        df_wide = df.pivot(index='meta_ROI', columns='times', values='data')

        # Convert to numpy array
        TS_array = df_wide.to_numpy()

        # Load data 
        calc_copy.load_dataset(TS_array)
        calc_copy.compute()

        SPI_res = deepcopy(calc_copy.table)

        # Iterate over each SPI
        SPI_res.columns = SPI_res.columns.to_flat_index()

        SPI_res = SPI_res.rename(columns='__'.join).assign(meta_ROI_from = lambda x: x.index)
        SPI_res_long = SPI_res.melt(id_vars='meta_ROI_from', var_name='SPI__meta_ROI_to', value_name='value')

        SPI_res_long["SPI"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[0]
        SPI_res_long["meta_ROI_to"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[1]

        SPI_res_long = (SPI_res_long
                        .drop(columns='SPI__meta_ROI_to')
                        .query('meta_ROI_from != meta_ROI_to')
                        .assign(meta_ROI_from = lambda x: x['meta_ROI_from'].map(ROI_lookup),
                                meta_ROI_to = lambda x: x['meta_ROI_to'].map(ROI_lookup))
                        .filter(items=['SPI', 'meta_ROI_from', 'meta_ROI_to', 'value'])
                        .assign(stimulus_type = df['stimulus_type'].unique()[0],
                                relevance_type = df['relevance_type'].unique()[0],
                                duration = df['duration'].unique()[0],
                                frequency_band = df['frequency_band'].unique()[0],
                                subject_ID = df['subject_ID'].unique()[0])
        )

        return SPI_res_long

In [8]:
# Find which df in on_sample_TS_data_list is empty
for i in range(len(on_sample_TS_data_list)):
    if on_sample_TS_data_list[i].empty:
        print(i)

18
19
20


In [7]:
# Initialise an empty list for the results
on_data_pyspi_list = []
off_data_pyspi_list = []

# Initialise a base calculator
calc = Calculator(subset='fast')

# Run for "on" data
for on_data in on_sample_TS_data_list:
    on_data_pyspi = run_pyspi_for_df(on_data, calc).assign(stimulus = "on")
    on_data_pyspi_list.append(on_data_pyspi)
on_data_pyspi_res = pd.concat(on_data_pyspi_list)

# Run for "off" data
for off_data in off_sample_TS_data_list:
    off_data_pyspi = run_pyspi_for_df(off_data, calc).assign(stimulus = "off")
    off_data_pyspi_list.append(off_data_pyspi)
off_data_pyspi_res = pd.concat(off_data_pyspi_list)

Loading configuration file: /headnode1/abry4213/.conda/envs/pyspi/lib/python3.9/site-packages/pyspi/fast_config.yaml
*** Importing module .statistics.basic
[0] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'EmpiricalCovariance'})...
Succesfully initialised SPI with identifier "cov_EmpiricalCovariance" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[1] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLasso'})...
Succesfully initialised SPI with identifier "cov_GraphicalLasso" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[2] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLassoCV'})...
Succesfully initialised SPI with identifier "cov_GraphicalLassoCV" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[3] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'LedoitWolf'})...
Succesfully initialised SPI with identifier "cov_LedoitWolf" and labels ['basic', 'unordered', 'lin

No GPU automatically detected. Setting SETTINGS.GPU to 0, and SETTINGS.NJOBS to cpu_count.


[45] Adding SPI .statistics.causal.AdditiveNoiseModel(x,y)...
Succesfully initialised SPI with identifier "anm" and labels ['unsigned', 'causal', 'unordered', 'linear', 'directed']
[46] Adding SPI .statistics.causal.ConditionalDistributionSimilarity(x,y)...
Succesfully initialised SPI with identifier "cds" and labels ['unsigned', 'causal', 'unordered', 'nonlinear', 'directed']
[47] Adding SPI .statistics.causal.RegressionErrorCausalInference(x,y)...
Succesfully initialised SPI with identifier "reci" and labels ['unsigned', 'causal', 'unordered', 'nonlinear', 'directed']
[48] Adding SPI .statistics.causal.InformationGeometricConditionalIndependence(x,y)...
Succesfully initialised SPI with identifier "igci" and labels ['causal', 'directed', 'nonlinear', 'unsigned', 'unordered']
*** Importing module .statistics.infotheory
[49] Adding SPI .statistics.infotheory.JointEntropy(x,y,{'estimator': 'gaussian'})...
Succesfully initialised SPI with identifier "je_gaussian" and labels ['unsigned', '

Frequency minimum set to 0; overriding to 1e-5.


[187] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fs': 1})...
Succesfully initialised SPI with identifier "psi_wavelet_mean_fs-1_fmin-0_fmax-0-5_mean" and labels ['unsigned', 'wavelet', 'undirected']
[188] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0, 'fmax': 0.25})...
Succesfully initialised SPI with identifier "psi_wavelet_mean_fs-1_fmin-0_fmax-0-25_mean" and labels ['unsigned', 'wavelet', 'undirected']
[189] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0.25, 'fmax': 0.5})...
Succesfully initialised SPI with identifier "psi_wavelet_mean_fs-1_fmin-0-25_fmax-0-5_mean" and labels ['unsigned', 'wavelet', 'undirected']
[190] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0, 'fmax': 0.5, 'statistic': 'max'})...
Succesfully initialised SPI with identifier "psi_wavelet_max_fs-1_fmin-0_fmax-0-5_max" and labels ['unsigned', 'wavelet', 'undirected']
[191] Adding SPI .statistics.wavelet.PhaseSlopeIndex(x,y,{'fmin': 0, 'fmax': 0.25, 'statistic

pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
Processing [None: cov-sq_LedoitWolf]:   1%|▏         | 3/215 [00:00<00:12, 17.56it/s]         

[193] Adding SPI .statistics.misc.LinearModel(x,y,{'model': 'Ridge'})...
Succesfully initialised SPI with identifier "lmfit_Ridge" and labels ['misc', 'unsigned', 'unordered', 'normal', 'linear', 'directed']
[194] Adding SPI .statistics.misc.LinearModel(x,y,{'model': 'Lasso'})...
Succesfully initialised SPI with identifier "lmfit_Lasso" and labels ['misc', 'unsigned', 'unordered', 'normal', 'linear', 'directed']
[195] Adding SPI .statistics.misc.LinearModel(x,y,{'model': 'SGDRegressor'})...
Succesfully initialised SPI with identifier "lmfit_SGDRegressor" and labels ['misc', 'unsigned', 'unordered', 'normal', 'linear', 'directed']
[196] Adding SPI .statistics.misc.LinearModel(x,y,{'model': 'ElasticNet'})...
Succesfully initialised SPI with identifier "lmfit_ElasticNet" and labels ['misc', 'unsigned', 'unordered', 'normal', 'linear', 'directed']
[197] Adding SPI .statistics.misc.LinearModel(x,y,{'model': 'BayesianRidge'})...
Succesfully initialised SPI with identifier "lmfit_BayesianRidg

Processing [None: xme_gaussian_k10]:  26%|██▌       | 56/215 [00:05<00:27,  5.79it/s]          Caught <java class 'infodynamics.utils.NonPositiveDefiniteMatrixException'> for SPI "xme_gaussian_k10": infodynamics.utils.NonPositiveDefiniteMatrixException: CholeskyDecomposition is only performed on positive-definite matrices. Some reasons for non-positive-definite matrix are listed at http://www2.gsu.edu/~mkteer/npdmatri.html - note: a correlation matrix is non-positive-definite if you have more variables than observations
Processing [None: phase_multitaper_mean_fs-1_fmin-0_fmax-0-5]:  33%|███▎      | 72/215 [00:07<00:18,  7.59it/s]Mean of empty slice
Processing [None: phase_multitaper_max_fs-1_fmin-0_fmax-0-5]:  33%|███▎      | 72/215 [00:07<00:18,  7.59it/s]    All-NaN slice encountered
Mean of empty slice
Processing [None: sgc_nonparametric_max_fs-1_fmin-0_fmax-0-5]:  76%|███████▌  | 163/215 [00:16<00:07,  7.00it/s]    All-NaN slice encountered
Processing [None: sgc_parametric_mean_fs-

ZeroDivisionError: integer division or modulo by zero