In [1]:
import pandas as pd
import numpy as np
import pyspi
from pyspi.calculator import Calculator
import os
from copy import deepcopy
import glob
import mne_connectivity
import mne

In [15]:
# Initialise a base calculator
calc = Calculator(subset='fast')

Loading configuration file: /headnode1/abry4213/.conda/envs/pyspi/lib/python3.9/site-packages/pyspi/fast_config.yaml
*** Importing module .statistics.basic
[0] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'EmpiricalCovariance'})...
Succesfully initialised SPI with identifier "cov_EmpiricalCovariance" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[1] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLasso'})...
Succesfully initialised SPI with identifier "cov_GraphicalLasso" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[2] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'GraphicalLassoCV'})...
Succesfully initialised SPI with identifier "cov_GraphicalLassoCV" and labels ['basic', 'unordered', 'linear', 'undirected', 'signed']
[3] Adding SPI .statistics.basic.Covariance(x,y,{'estimator': 'LedoitWolf'})...
Succesfully initialised SPI with identifier "cov_LedoitWolf" and labels ['basic', 'unordered', 'lin

Frequency minimum set to 0; overriding to 1e-5.


In [16]:
data_path="/headnode1/abry4213/data/Cogitate_MEG_challenge/derivatives/MEG_time_series/"

subject_id = "sub-CB042"
sub_CA103_test_data=pd.read_csv(f"{data_path}/{subject_id}_ses-1_meg_1000ms_all_time_series.csv")
sub_CA103_test_data['duration'] = sub_CA103_test_data['duration'].str.replace('ms', '').astype(int)
sub_CA103_test_data['times'] = np.round(sub_CA103_test_data['times']*1000)
sub_CA103_test_data['times'] = sub_CA103_test_data['times'].astype(int)

In [17]:
ROI_lookup = {"proc-0": "Category_Selective",
              "proc-1": "GNWT",
              "proc-2": "IIT"}

In [18]:
# Filter times to the duration range
sample_TS_data_onset = sub_CA103_test_data.query('times >= 0 and times < @sub_CA103_test_data.duration')
sample_TS_data_offset = sub_CA103_test_data.query('times >= @sub_CA103_test_data.duration')

# Create list of dataframes for each stimulus_type, relevance_type, duration, and frequency_band
on_sample_TS_data_list = []
off_sample_TS_data_list = []

for stimulus_type in sample_TS_data_onset['stimulus_type'].unique():
    for relevance_type in sample_TS_data_onset['relevance_type'].unique():
        for duration in [1000]:
        # for duration in sample_TS_data_onset['duration'].unique():
            TS_data_on = sample_TS_data_onset.query('stimulus_type == @stimulus_type and relevance_type == @relevance_type and duration == @duration')
            if TS_data_on.empty:
                print(f"Missing data for {stimulus_type}, {relevance_type}, {duration}")
            on_sample_TS_data_list.append(TS_data_on)
            TS_data_off = sample_TS_data_offset.query('stimulus_type == @stimulus_type and relevance_type == @relevance_type and duration == @duration')
            off_sample_TS_data_list.append(TS_data_off)

Missing data for face, Relevant target, 1000


In [13]:
def run_pyspi_for_df(subject_id, df, calc):
        # Make deepcopy of calc 
        calc_copy = deepcopy(calc)

        # Pivot so that the columns are meta_ROI and the rows are data
        df_wide = (df.filter(items=['times', 'Category_Selective', 'GNWT', 'IIT'])
                     .melt(id_vars='times', var_name='meta_ROI', value_name='data')
                     .reset_index()
                     .pivot(index='meta_ROI', columns='times', values='data'))

        # Convert to numpy array
        TS_array = df_wide.to_numpy()

        # Load data 
        calc_copy.load_dataset(TS_array)
        calc_copy.compute()

        SPI_res = deepcopy(calc_copy.table)

        # Iterate over each SPI
        SPI_res.columns = SPI_res.columns.to_flat_index()

        SPI_res = SPI_res.rename(columns='__'.join).assign(meta_ROI_from = lambda x: x.index)
        SPI_res_long = SPI_res.melt(id_vars='meta_ROI_from', var_name='SPI__meta_ROI_to', value_name='value')

        SPI_res_long["SPI"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[0]
        SPI_res_long["meta_ROI_to"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[1]

        SPI_res_long = (SPI_res_long
                        .drop(columns='SPI__meta_ROI_to')
                        .query('meta_ROI_from != meta_ROI_to')
                        .assign(meta_ROI_from = lambda x: x['meta_ROI_from'].map(ROI_lookup),
                                meta_ROI_to = lambda x: x['meta_ROI_to'].map(ROI_lookup))
                        .filter(items=['SPI', 'meta_ROI_from', 'meta_ROI_to', 'value'])
                        .assign(stimulus_type = df['stimulus_type'].unique()[0],
                                relevance_type = df['relevance_type'].unique()[0],
                                duration = df['duration'].unique()[0],
                                subject_ID = subject_id)
        )

        return SPI_res_long

In [14]:
# Initialise an empty list for the results
on_data_pyspi_list = []
off_data_pyspi_list = []

# Run for "on" data
for on_data in on_sample_TS_data_list[0:1]:
    on_data_pyspi = run_pyspi_for_df(subject_id, on_data, calc).assign(stimulus = "on")
    on_data_pyspi_list.append(on_data_pyspi)
on_data_pyspi_res = pd.concat(on_data_pyspi_list)


Processing [None: cov_GraphicalLassoCV]:   0%|          | 0/215 [00:00<?, ?it/s]   

Processing [None: phase_multitaper_mean_fs-1_fmin-0_fmax-0-5]:  33%|███▎      | 72/215 [00:07<00:20,  6.84it/s]Mean of empty slice
Processing [None: phase_multitaper_max_fs-1_fmin-0_fmax-0-5]:  33%|███▎      | 72/215 [00:07<00:20,  6.84it/s]    All-NaN slice encountered
Processing [None: sgc_nonparametric_mean_fs-1_fmin-0_fmax-0-5]:  72%|███████▏  | 155/215 [00:13<00:10,  5.57it/s]   Mean of empty slice
Processing [None: sgc_nonparametric_max_fs-1_fmin-0_fmax-0-5]:  76%|███████▋  | 164/215 [00:13<00:07,  6.80it/s]    All-NaN slice encountered
Processing [None: pec_orth_log_abs]: 100%|██████████| 215/215 [00:14<00:00, 15.10it/s]                                      


In [None]:

# Run for "off" data
for off_data in off_sample_TS_data_list:
    off_data_pyspi = run_pyspi_for_df(off_data, calc).assign(stimulus = "off")
    off_data_pyspi_list.append(off_data_pyspi)
off_data_pyspi_res = pd.concat(off_data_pyspi_list)