In [1]:
import pandas as pd
import numpy as np

import pyspi
from pyspi.calculator import Calculator

import os
from copy import deepcopy

In [2]:
data_path="/headnode1/abry4213/data/Cogitate_MEG_challenge"

TS_data_path=f"{data_path}/derivatives/MEG_time_series/sub-CB050/ses-1/meg"

In [9]:
ROI_lookup = {"proc-0": "Category_Selective",
              "proc-1": "GNWT",
              "proc-2": "IIT"}

In [3]:
sample_TS_data_list = []

for TS_file in os.listdir(TS_data_path):
    subject_ID = TS_file.split("_")[0]
    stimulus_type = TS_file.split("desc-")[1].split("_")[0]
    relevance_type = TS_file.split(f"{stimulus_type}_")[1].split("_")[0]
    duration = TS_file.split(f"{relevance_type}_")[1].split("_")[0]
    meta_ROI = TS_file.split(f"{duration}_")[1].split("_meta")[0]
    frequency_band = TS_file.split("freq_")[1].split("_TS")[0]

    TS_data = (pd.read_csv(f"{TS_data_path}/{TS_file}")
               .assign(subject_ID=subject_ID,
                       stimulus_type=stimulus_type,
                       relevance_type=relevance_type,
                       duration=duration,
                       meta_ROI=meta_ROI,
                       frequency_band=frequency_band))

    sample_TS_data_list.append(TS_data)

sample_TS_data = pd.concat(sample_TS_data_list)
sample_TS_data['duration'] = sample_TS_data['duration'].str.replace('ms', '').astype(int)/1000
sample_TS_data.head()

Unnamed: 0,times,data,subject_ID,stimulus_type,relevance_type,duration,meta_ROI,frequency_band
0,-0.5,0.549058,sub-CB050,False,Irrelevant,1.0,IIT,beta
1,-0.499,0.563103,sub-CB050,False,Irrelevant,1.0,IIT,beta
2,-0.498,0.577039,sub-CB050,False,Irrelevant,1.0,IIT,beta
3,-0.497,0.590849,sub-CB050,False,Irrelevant,1.0,IIT,beta
4,-0.496,0.604514,sub-CB050,False,Irrelevant,1.0,IIT,beta


In [4]:
# Filter times to the duration range
sample_TS_data = sample_TS_data.query('times >= 0.0 and times <= @sample_TS_data.duration')

# Create list of dataframes for each stimulus_type, relevance_type, duration, and frequency_band
sample_TS_data_list = []

for stimulus_type in sample_TS_data['stimulus_type'].unique():
    for relevance_type in sample_TS_data['relevance_type'].unique():
        for duration in sample_TS_data['duration'].unique():
            for frequency_band in sample_TS_data['frequency_band'].unique():
                TS_data = sample_TS_data.query('stimulus_type == @stimulus_type and relevance_type == @relevance_type and duration == @duration and frequency_band == @frequency_band')
                sample_TS_data_list.append(TS_data)

In [12]:
def run_pyspi_for_df(df):
    # Pivot so that the columns are meta_ROI and the rows are data
    df_wide = df.pivot(index='meta_ROI', columns='times', values='data')

    # Convert to numpy array
    TS_array = df_wide.to_numpy()

    calc = Calculator(dataset=TS_array, subset='sonnet')
    calc.compute()

    SPI_res = deepcopy(calc.table)

    # Iterate over each SPI
    SPI_res.columns = SPI_res.columns.to_flat_index()

    SPI_res = SPI_res.rename(columns='__'.join).assign(meta_ROI_from = lambda x: x.index)
    SPI_res_long = SPI_res.melt(id_vars='meta_ROI_from', var_name='SPI__meta_ROI_to', value_name='value')

    SPI_res_long["SPI"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[0]
    SPI_res_long["meta_ROI_to"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[1]

    SPI_res_long = (SPI_res_long
                    .drop(columns='SPI__meta_ROI_to')
                    .query('meta_ROI_from != meta_ROI_to')
                    .assign(meta_ROI_from = lambda x: x['meta_ROI_from'].map(ROI_lookup),
                            meta_ROI_to = lambda x: x['meta_ROI_to'].map(ROI_lookup))
                    .filter(items=['SPI', 'meta_ROI_from', 'meta_ROI_to', 'value'])
    )

    return SPI_res_long

In [17]:
example_df = sample_TS_data_list[0]

# Pivot so that the columns are meta_ROI and the rows are data
df_wide = example_df.pivot(index='times', columns='meta_ROI', values='data')

test_pyspi_res.to_csv("sub-CB050__face_stimulus__relevant__1000ms__alpha_pyspi_res.csv")

In [13]:
test_pyspi_res = run_pyspi_for_df(sample_TS_data_list[0])

Frequency minimum set to 0; overriding to 1e-5.
Processing [None: bary_dtw_mean]:  14%|█▍        | 2/14 [00:00<00:00, 15.48it/s]         

Loading configuration file: /headnode1/abry4213/.conda/envs/pyspi/lib/python3.9/site-packages/pyspi/sonnet_config.yaml
*** Importing module .statistics.basic
[0] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'EmpiricalCovariance'})...
Succesfully initialised SPI with identifier "cov_EmpiricalCovariance" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
*** Importing module .statistics.distance
[1] Adding SPI .statistics.distance.DynamicTimeWarping(x,y,{'global_constraint': 'itakura'})...
Succesfully initialised SPI with identifier "dtw_constraint-itakura" and labels ['unsigned', 'distance', 'temporal', 'undirected', 'nonlinear']
[2] Adding SPI .statistics.distance.Barycenter(x,y,{'mode': 'dtw', 'statistic': 'mean'})...
Succesfully initialised SPI with identifier "bary_dtw_mean" and labels ['distance', 'signed', 'undirected', 'temporal', 'nonlinear']
*** Importing module .statistics.causal
[3] Adding SPI .statistics.causal.AdditiveNoiseModel(x,y)...
Succesf

Processing [None: cohmag_multitaper_mean_fs-1_fmin-0_fmax-0-5]:  50%|█████     | 7/14 [00:09<00:08,  1.22s/it]Mean of empty slice
Processing [None: sgc_nonparametric_mean_fs-1_fmin-0_fmax-0-5]:  50%|█████     | 7/14 [00:09<00:08,  1.22s/it]Mean of empty slice
Processing [None: pec]: 100%|██████████| 14/14 [00:10<00:00,  1.32it/s]                                           


In [36]:
# Separate out: 
# stimulus_type is face
# relevance_type is Relevant non-target
# duration is 0.5 
# meta_ROI is IIT 
# frequency_band is alpha

example_TS_data = sample_TS_data.query('stimulus_type == "face" & relevance_type == "Relevant non-target" and duration == 0.5 and frequency_band == "alpha"')

# Pivot so that the columns are meta_ROI and the rows are data
example_TS_data = example_TS_data.pivot(index='meta_ROI', columns='times', values='data')

# Convert to numpy array
example_TS_array = example_TS_data.to_numpy()

In [13]:
np.save("example_array.npy", TS_array)

In [37]:
calc = Calculator(dataset=example_TS_array)
calc.compute()

Loading configuration file: /headnode1/abry4213/.conda/envs/pyspi_new/lib/python3.10/site-packages/pyspi/config.yaml


Frequency minimum set to 0; overriding to 1e-5.


*** Importing module .statistics.basic
[0] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'EmpiricalCovariance'})
Succesfully initialised SPI with identifier "cov_EmpiricalCovariance" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[1] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'EllipticEnvelope'})
Succesfully initialised SPI with identifier "cov_EllipticEnvelope" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[2] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLasso'})
Succesfully initialised SPI with identifier "cov_GraphicalLasso" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[3] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLassoCV'})
Succesfully initialised SPI with identifier "cov_GraphicalLassoCV" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[4] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'LedoitWolf'})
Succesfull

Processing [None: bary_sgddtw_mean]:  23%|██▎       | 66/284 [00:21<01:20,  2.72it/s]              DBA loss is increasing while it should not be. Stopping optimization.
Processing [None: ccm_E-None_mean]:  30%|██▉       | 85/284 [00:45<00:40,  4.95it/s]       IOStream.flush timed out
Processing [None: cce_gaussian]:  33%|███▎      | 95/284 [03:31<28:37,  9.09s/it]   Caught <java class 'infodynamics.utils.NonPositiveDefiniteMatrixException'> for SPI "cce_gaussian": infodynamics.utils.NonPositiveDefiniteMatrixException: CholeskyDecomposition is only performed on positive-definite matrices. Some reasons for non-positive-definite matrix are listed at http://www2.gsu.edu/~mkteer/npdmatri.html - note: a correlation matrix is non-positive-definite if you have more variables than observations. Failed row is 3
Processing [None: xme_gaussian_k10]:  38%|███▊      | 108/284 [03:31<11:35,  3.95s/it]   Caught <java class 'infodynamics.utils.NonPositiveDefiniteMatrixException'> for SPI "xme_gaussian_


Calculation complete. Time taken: 248.8387s

SPI Computation Results Summary

Total number of SPIs attempted: 284
Number of SPIs successfully computed: 275 (96.83%)
------------------------------------------------------------
Category       | Count | Percentage
------------------------------------------------------------
Successful     |   275 |  96.83%
NaNs           |     9 |   3.17%
Partial NaNs   |     0 |   0.00%
------------------------------------------------------------

[9] SPI(s) produced NaN outputs:
------------------------------------------------------------
1. cce_gaussian
2. xme_gaussian_k10
3. di_gaussian
4. sgc_parametric_mean_fs-1_fmin-0_fmax-0-5_order-None
5. sgc_parametric_mean_fs-1_fmin-0_fmax-0-25_order-None
6. sgc_parametric_mean_fs-1_fmin-0-25_fmax-0-5_order-None
7. sgc_parametric_max_fs-1_fmin-1e-05_fmax-0-5_order-None
8. sgc_parametric_max_fs-1_fmin-0_fmax-0-25_order-None
9. sgc_parametric_max_fs-1_fmin-0-25_fmax-0-5_order-None
-------------------------------

In [67]:
example_res = deepcopy(calc.table)

# Iterate over each SPI
example_res.columns = example_res.columns.to_flat_index()

example_res = example_res.rename(columns='__'.join).assign(meta_ROI_from = lambda x: x.index)

example_res_long = example_res.melt(id_vars='meta_ROI_from', var_name='SPI__meta_ROI_to', value_name='value')

example_res_long["SPI"] = example_res_long["SPI__meta_ROI_to"].str.split("__").str[0]
example_res_long["meta_ROI_to"] = example_res_long["SPI__meta_ROI_to"].str.split("__").str[1]

In [None]:

# Iterate over each SPI
example_res.columns = example_res.columns.to_flat_index()

# Rename index as first brain region
# example_res = example_res.rename(columns={"index": "meta_ROI_from"})

example_res.columns = ["_".join(a) for a in example_res.columns.to_flat_index()]
example_res.columns


In [22]:


# Convert index to column
example_res.reset_index(level=0, inplace=True)

# Rename index as first brain region
example_res = example_res.rename(columns={"index": "meta_ROI_from"})

example_res.head()

ValueError: cannot insert level_0, already exists

In [19]:
# Pivot data from wide to long
example_res_long = pd.melt(example_res, id_vars="meta_ROI_from")

In [18]:

# Pivot data from wide to long
example_res_long = pd.melt(example_res, id_vars="meta_ROI_from")
example_res_long['SPI'], example_res_long['meta_ROI_to'] = example_res_long.variable.str

# Remove variable column
example_res_long = example_res_long.drop("variable", 1)

TypeError: 'StringMethods' object is not iterable