In [2]:
import pandas as pd
import numpy as np
from os import path as op
import os
from pyspi.calculator import Calculator
from copy import deepcopy

In [9]:
bids_root = "/headnode1/abry4213/data/Cogitate_Batch2/MEG_Data/"
averaged_epochs_dir = op.join(bids_root, "derivatives", "time_series_features/averaged_epochs")
individual_epochs_dir = op.join(bids_root, "derivatives", "time_series_features/individual_epochs")

In [4]:
# list files in averaged_epochs_dir
averaged_epochs_files = os.listdir(averaged_epochs_dir)

# Find subject names in averaged_epochs_files
averaged_epochs_subjects = [f.split("_")[0] for f in averaged_epochs_files]

# list files in individual_epochs_dir
individual_epochs_files = os.listdir(individual_epochs_dir)

# Find subject names in individual_epochs_files
individual_epochs_subjects = [f.split("_")[0] for f in individual_epochs_files]

# Find subjects that are in individual_epochs_files but not in averaged_epochs_files
missing_subjects = list(set(individual_epochs_subjects) - set(averaged_epochs_subjects))

In [5]:
subject_id = "CB042"
region_option = "hypothesis_driven"
visit_id="1"
record="run"
duration = '1000ms'

# Time series output path for this subject
time_series_path = op.join(bids_root, "derivatives", "MEG_time_series")
output_feature_path = op.join(bids_root, "derivatives", "time_series_features/averaged_epochs")

# Define ROI lookup table
if region_option == "hypothesis_driven":
    ROI_lookup = {"proc-0": "Category_Selective",
                  "proc-1": "GNWT",
                  "proc-2": "IIT"}

In [6]:
# Iterate over all the time-series files for this subject
sample_TS_data_list = []

sample_TS_data=pd.read_csv(f"{time_series_path}/sub-{subject_id}_ses-{visit_id}_meg_{duration}_all_time_series.csv")
sample_TS_data['duration'] = sample_TS_data['duration'].str.replace('ms', '').astype(int)
sample_TS_data['times'] = np.round(sample_TS_data['times']*1000)
sample_TS_data['times'] = sample_TS_data['times'].astype(int)

In [7]:
# Filter times >= 0
sample_TS_data = sample_TS_data.query('times >= 0')

# Assign stimulus as on if times < duration and off if times >= duration
sample_TS_data['stimulus'] = np.where(sample_TS_data['times'] < sample_TS_data['duration'], 'on', 'off')

# Create list of dataframes for each stimulus_type, relevance_type, duration, and frequency_band
# One list for 'on' (while stimulus is being presented) and another for 'off' (after stimulus is no longer being presented)
sample_TS_data_list = []
for stimulus_type in sample_TS_data['stimulus_type'].unique():
    for relevance_type in sample_TS_data['relevance_type'].unique():
        for duration in sample_TS_data['duration'].unique():
            for stimulus_presentation in ['on', 'off']:
            # for duration in sample_TS_data['duration'].unique():
                this_condition_data = sample_TS_data.query('stimulus_type == @stimulus_type and relevance_type == @relevance_type and duration == @duration and stimulus == @stimulus_presentation')
                if this_condition_data.empty:
                    print(f"Missing data for {stimulus_type}, {relevance_type}, {duration}, {stimulus_presentation}")
                    continue
                sample_TS_data_list.append(this_condition_data)

Missing data for face, Relevant target, 1000, on
Missing data for face, Relevant target, 1000, off


In [None]:
def run_pyspi_for_df(subject_id, df, calc):
        # Make deepcopy of calc 
        calc_copy = deepcopy(calc)

        # Pivot so that the columns are meta_ROI and the rows are data
        df_wide = (df.filter(items=['times', 'Category_Selective', 'GNWT', 'IIT'])
                     .melt(id_vars='times', var_name='meta_ROI', value_name='data')
                     .reset_index()
                     .pivot(index='meta_ROI', columns='times', values='data'))

        # Convert to numpy array
        TS_array = df_wide.to_numpy()

        # Load data 
        calc_copy.load_dataset(TS_array)
        calc_copy.compute()

        SPI_res = deepcopy(calc_copy.table)

        # Iterate over each SPI
        SPI_res.columns = SPI_res.columns.to_flat_index()

        SPI_res = SPI_res.rename(columns='__'.join).assign(meta_ROI_from = lambda x: x.index)
        SPI_res_long = SPI_res.melt(id_vars='meta_ROI_from', var_name='SPI__meta_ROI_to', value_name='value')

        SPI_res_long["SPI"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[0]
        SPI_res_long["meta_ROI_to"] = SPI_res_long["SPI__meta_ROI_to"].str.split("__").str[1]

        SPI_res_long = (SPI_res_long
                        .drop(columns='SPI__meta_ROI_to')
                        .query('meta_ROI_from != meta_ROI_to')
                        .assign(meta_ROI_from = lambda x: x['meta_ROI_from'].map(ROI_lookup),
                                meta_ROI_to = lambda x: x['meta_ROI_to'].map(ROI_lookup))
                        .filter(items=['SPI', 'meta_ROI_from', 'meta_ROI_to', 'value'])
                        .assign(stimulus_type = df['stimulus_type'].unique()[0],
                                relevance_type = df['relevance_type'].unique()[0],
                                duration = df['duration'].unique()[0],
                                stimulus_presentation = df['stimulus'].unique()[0],
                                subject_ID = subject_id)
        )

        return SPI_res_long
# Initialise an empty list for the results
pyspi_res_list = []

# Initialise a base calculator
calc = Calculator(subset='fast')

# Run for data
for dataframe in sample_TS_data_list:
    dataframe_pyspi = run_pyspi_for_df(subject_id, dataframe, calc).assign(stimulus = "on")
    pyspi_res_list.append(dataframe_pyspi)

# Concatenate the results and save to a feather file
all_pyspi_res = pd.concat(pyspi_res_list).reset_index() 
all_pyspi_res.to_csv(f"{output_feature_path}/sub-{subject_id}_ses-{visit_id}_all_pyspi_results_{duration}.csv", index=False)

In [12]:
for file in os.listdir(averaged_epochs_dir):
    file_string_replaced = file.replace(".csv", "ms.csv")
    os.rename(f"{averaged_epochs_dir}/{file}", f"{averaged_epochs_dir}/{file_string_replaced}")