In [9]:
import sys
sys.path.append('/app')


import timecorr as tc
from timecorr.helpers import isfc, wisfc, mean_combine, corrmean_combine
from scipy.io import loadmat
import numpy as np
import os
import pandas as pd


data_dir = os.path.join(os.getcwd(), '..', '..','data')  # Set 'data' as the base directory


#Load datasets in to working memory.
pieman_conds = ['intact', 'paragraph', 'word', 'rest']
pieman_100 = loadmat(os.path.join(data_dir, 'pieman_ica100.mat'))
pieman_700 = loadmat(os.path.join(data_dir, 'pieman_data.mat'))

len(pieman_700)

7

In [11]:
# Remove bad data from the datafiles, save as numpy array
pieman_100_data_conditioned = []
pieman_700_data_conditioned = []
conds_100 = []
conds_700 = []

def subjects_from_cell(A):
    # A: ndarray(dtype=object, shape=(N,)) where each elem is (T, F)
    return [np.asarray(x).squeeze() for x in A.ravel()]

for c in pieman_conds:
    print(c)

    # --- 700 set: cell array of subjects, each (T, 700) ---
    subs_700 = subjects_from_cell(pieman_700[c])
    if c == 'paragraph' and len(subs_700) > 3:
        # drop subject index 3 (0-based) for paragraph
        subs_700 = [s for i, s in enumerate(subs_700) if i != 3]

    # --- 100 set: may be cell or numeric; normalize to per-subject arrays ---
    A100 = pieman_100[c]
    if isinstance(A100, np.ndarray) and A100.dtype == object:
        subs_100 = subjects_from_cell(A100)      # list of (T, 100)
    else:
        A100 = np.asarray(A100)
        # Common layout: time x subjects (T x N)
        if A100.ndim == 2:
            subs_100 = [A100[:, i].squeeze() for i in range(A100.shape[1])]
        elif A100.ndim == 3:
            subs_100 = [A100[..., i].squeeze() for i in range(A100.shape[-1])]
        else:
            raise ValueError(f"Unexpected 100-set shape for {c}: {A100.shape}")

    if c == 'paragraph' and len(subs_100) > 0:
        # drop subject index 0 (0-based) for paragraph
        subs_100 = [s for i, s in enumerate(subs_100) if i != 0]

    # accumulate
    pieman_700_data_conditioned.extend(subs_700)
    pieman_100_data_conditioned.extend(subs_100)
    conds_700.extend([c] * len(subs_700))
    conds_100.extend([c] * len(subs_100))

# Convert AFTER the loop (preserve per-subject arrays -> object dtype)
pieman_700_data_conditioned = np.array(pieman_700_data_conditioned, dtype=object)  # each (T, 700)
pieman_100_data_conditioned = np.array(pieman_100_data_conditioned, dtype=object)  # each (T, 100)
conds_700 = np.array(conds_700)
conds_100 = np.array(conds_100)


intact
paragraph
word
rest


In [16]:
import numpy as np
from pathlib import Path
import os

out_dir = Path(os.getcwd(), '..', '..', 'data') / "initial_data"
out_dir.mkdir(parents=True, exist_ok=True)

for cond in pieman_conds: 
    # 100-node subjects for this condition
    idx_100 = np.where(conds_100 == cond)[0]
    subs_100 = pieman_100_data_conditioned[idx_100]
    np.save(out_dir / f"pieman_data_100_{cond}.npy", subs_100, allow_pickle=True)

    # 700-node subjects for this condition
    idx_700 = np.where(conds_700 == cond)[0]
    subs_700 = pieman_700_data_conditioned[idx_700]
    np.save(out_dir / f"pieman_data_700_{cond}.npy", subs_700, allow_pickle=True)

print("Saved 8 files to", out_dir)


Saved 8 files to /app/Scripts/Old/../../data/initial_data


In [14]:
print(os.getcwd())

/app/Scripts/Old


In [15]:
import numpy as np
from pathlib import Path

data_dir = Path("/app/data/initial_data")

# loop over all 8 expected files
for node in [100, 700]:
    for cond in ["intact", "paragraph", "word", "rest"]:
        fname = data_dir / f"pieman_{node}_{cond}.npy"
        arr = np.load(fname, allow_pickle=True)
        n_subs = len(arr)
        shapes = {s.shape for s in arr}
        print(f"{fname.name}: {n_subs} subjects, shapes {shapes}")

        # sanity: compare first two subjects if >1
        if n_subs > 1:
            diff = np.max(np.abs(arr[0] - arr[1]))
            print(f"   max|sub0-sub1| = {diff:.4f}")


pieman_100_intact.npy: 36 subjects, shapes {(300, 100)}
   max|sub0-sub1| = 0.2657
pieman_100_paragraph.npy: 17 subjects, shapes {(272, 100)}
   max|sub0-sub1| = 0.3116
pieman_100_word.npy: 36 subjects, shapes {(300, 100)}
   max|sub0-sub1| = 0.1860
pieman_100_rest.npy: 36 subjects, shapes {(400, 100)}
   max|sub0-sub1| = 0.3673
pieman_700_intact.npy: 36 subjects, shapes {(300, 700)}
   max|sub0-sub1| = 8.9610
pieman_700_paragraph.npy: 17 subjects, shapes {(272, 700)}
   max|sub0-sub1| = 10.0279
pieman_700_word.npy: 36 subjects, shapes {(300, 700)}
   max|sub0-sub1| = 11.0626
pieman_700_rest.npy: 36 subjects, shapes {(400, 700)}
   max|sub0-sub1| = 10.7606
