In [2]:
import pandas as pd
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import seaborn as sns
import os
import timecorr as tc
import scipy.stats as scp

from dynamicfc import temporal_smooth, compute_dFC, lm_parallel

project_folder = '/Users/catcamacho/Library/CloudStorage/Box-Box/CCP/HBN_study/social_proc_networks'
proc_folder = os.path.join(project_folder,'dynamic_connectivity','DATA','processed_data')
null_folder = os.path.join(project_folder,'dynamic_connectivity','DATA','null_data')
sample_file = os.path.join(project_folder,'dynamic_connectivity','DATA', 'helper_files','sample_gord.32k_fs_LR.pscalar.nii')
vid_dir = os.path.join(project_folder, 'dynamic_connectivity','DATA', 'video_data')
motion_dir = os.path.join(project_folder, 'dynamic_connectivity','DATA', 'motion_data')
beta_dir = os.path.join(project_folder, 'dynamic_connectivity','DATA', 'beta_maps')

# get parcel and network labels
parcel_labels = nib.load(sample_file).header.get_axis(1).name[:333]
network_labels = []
for s in parcel_labels:
    b = s.split('_')
    if len(b)<2:
        network_labels.append(b[0])
    else:
        network_labels.append(b[1])
network_labels = np.array(network_labels)
network_names, network_sizes = np.unique(network_labels, return_counts=True)

# define measures of interest
networks_of_interest = ['Auditory', 'CinguloOperc', 'Default', 'DorsalAttn', 'FrontoParietal',
                        'SMhand', 'SMmouth', 'Salience', 'VentralAttn', 'Visual']
features_of_interest = ['Positive','Negative','Anger','Happy','Fear','Sad','Excited','Brightness',
                        'SaliencyFract','Sharpness','Vibrance','Loudness','Motion']

# set other variables
tshift = 6 # in TRs, equivalent to 4.8 seconds

## Preprocessing

In [None]:
## video features
dur = {'DM': 600, 'TP': 200}
for movie in ['DM','TP']:
    # load data
    video_features_file = os.path.join(vid_dir, '{0}_summary_codes_intuitivenames.csv'.format(movie))
    features = pd.read_csv(video_features_file, index_col=0)

    # rescale to 0 to 1 and smooth
    features_smooth = pd.DataFrame(index=features.index, columns=features.columns)
    features_smooth.loc[:,:] = MinMaxScaler().fit_transform(features.to_numpy())

    for c in features.columns:
        features_smooth.loc[:,c] = temporal_smooth(features_smooth[c].to_numpy(), np.arange(0,dur[movie],0.8), 1.2, window=10)

    features_smooth.to_csv(os.path.join(vid_dir, '{0}_summary_codes_smoothed.csv'.format(movie)))

In [None]:
## node-centric connectivity
for movie in ['DM','TP']:
    group_ts = np.load(os.path.join(project_folder, 'dynamic_connectivity', 
                                    'DATA', 'raw_data', 'compiled_ts_data_movie{0}.npy'.format(movie)))[:,:333,:]
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(movie)), index_col=0)
    for i, sub in enumerate(subinfo.index):
        print(sub)
        ts = group_ts[:,:,i]
        dFCflat_file = os.path.join(proc_folder, '{0}_{1}_dnFC_parcel_flat.npy'.format(sub, movie))
        compute_dFC(ts, dFCflat_file)

In [None]:
# create null distribution
for movie in ['DM','TP']:
    group_ts = np.load(os.path.join(project_folder, 'dynamic_connectivity', 
                                    'DATA', 'raw_data', 'compiled_ts_data_movie{0}.npy'.format(movie)))[:,:333,:]
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(movie)), index_col=0)
    orig_shape = group_ts.shape
    group_ts = group_ts.flatten()
    np.random.shuffle(group_ts)
    group_ts = np.reshape(group_ts, orig_shape)
    for i, sub in enumerate(subinfo.index):
        ts = group_ts[:,:,i]
        dFCflat_file = os.path.join(null_folder, '{0}_{1}_dnFC_parcel_flat.npy'.format(sub, movie))
        compute_dFC(ts, dFCflat_file)

In [None]:
movie = 'TP'
sub = 'sub-NDARAA504CRN'
i = 0
group_ts = np.load(os.path.join(project_folder, 'dynamic_connectivity', 
                                'DATA', 'raw_data', 'compiled_ts_data_movie{0}.npy'.format(movie)))[:,:333,:]
ts = group_ts[:,:,i]


In [None]:
## edge-centric connectivity
for movie in ['DM','TP']:
    group_ts = np.load(os.path.join(project_folder, 'dynamic_connectivity', 
                                    'DATA', 'raw_data', 'compiled_ts_data_movie{0}.npy'.format(movie)))[:,:333,:]
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(movie)), index_col=0)
    for i, sub in enumerate(subinfo.index):
        print(sub)
        ts = group_ts[:,:,i]
        ets = compute_ets(ts)
        dFCflat_file = os.path.join(proc_folder, '{0}_{1}_deFC_parcel_flat.npy'.format(sub, movie))
        compute_deFC(ets, dFCflat_file)

## Subject-level analysis

In [8]:
feats_of_interest = ['Anger','Happy','Fear','Sad','Excited','Brightness',
                     'SaliencyFract','Sharpness','Vibrance','Loudness','Motion']

for video in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(video)), index_col=0)
    for sub in subinfo.index:
        print(sub)
        if not os.path.exists(os.path.join(beta_dir,'{0}_task-movie{1}_nFC_{2}_beta_withfeats.npy'.format(sub, video, feats_of_interest[0]))):
            dFCflat_file = os.path.join(proc_folder, '{0}_{1}_dnFC_parcel_flat.npy'.format(sub, video))
            features_file = os.path.join(vid_dir, '{0}_summary_codes_smoothed.csv'.format(video))
            motion_file = os.path.join(motion_dir,'{0}_task-movie{1}_motion.npy'.format(sub, video))

            # cut off beginning of connectivity
            dFCflat = np.load(dFCflat_file)[tshift:,:]
            dFCflat = StandardScaler().fit_transform(dFCflat)

            # cut off beginning of motion
            motion = np.load(motion_file)[tshift:,:]
            motion = StandardScaler().fit_transform(motion)

            # cut off end of video features
            features = pd.read_csv(features_file, index_col=0).iloc[:-tshift, :]
            features.iloc[:,:] = StandardScaler().fit_transform(features.to_numpy())

            # conduct LM per edge per feature
            feat = features[feats_of_interest].to_numpy()
            beta, resid = lm_parallel(dFCflat, feat, motion)
            for i,f in enumerate(feats_of_interest):
                b_feat = beta[i,:]
                np.save(os.path.join(beta_dir,'{0}_task-movie{1}_nFC_{2}_beta_withfeats.npy'.format(sub, video, f)), beta)

sub-NDARAA504CRN
sub-NDARAA948VFH
sub-NDARAB458VK9
sub-NDARAB977GFB
sub-NDARAC331VEH
sub-NDARAC350BZ0
sub-NDARAG115LZP
sub-NDARAG191AE8
sub-NDARAG340ERT
sub-NDARAH948UF0
sub-NDARAK019ZR6
sub-NDARAL828WXM
sub-NDARAN524ZK6
sub-NDARAN934AZL
sub-NDARAP049KXJ
sub-NDARAP964VDB
sub-NDARAR238RZ8
sub-NDARAR305LXG
sub-NDARAT358XM9
sub-NDARAT680GJA
sub-NDARAT709MMX
sub-NDARAT982JTU
sub-NDARAU530GLJ
sub-NDARAV187GJ5
sub-NDARAV610EY3
sub-NDARAW179AYF
sub-NDARAW216PM7
sub-NDARAX075WL9
sub-NDARAX272ZJL
sub-NDARAY298THW
sub-NDARAY475AKD
sub-NDARAY969YUW
sub-NDARAZ681CYF
sub-NDARBD328NUQ
sub-NDARBD992CH7
sub-NDARBE287EFC
sub-NDARBF183RFB
sub-NDARBF293YRB
sub-NDARBG188RA5
sub-NDARBG819PT9
sub-NDARBH019KPD
sub-NDARBH512BHA
sub-NDARBH992ARB
sub-NDARBJ159HXB
sub-NDARBJ375VP4
sub-NDARBK106KRH
sub-NDARBK638HLZ
sub-NDARBL532FPT
sub-NDARBM370JCB
sub-NDARBN365EV3
sub-NDARBN620TT7
sub-NDARBP398JHL
sub-NDARBP527JWC
sub-NDARBP770MUM
sub-NDARBR128UFP
sub-NDARBR533KJZ
sub-NDARBT436PMT
sub-NDARBU183TDJ
sub-NDARBV167R

In [7]:
# create null distribution
for video in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(video)), index_col=0)
    for sub in subinfo.index:
        dFCflat_file = os.path.join(null_folder, '{0}_{1}_dnFC_parcel_flat.npy'.format(sub, video))
        features_file = os.path.join(vid_dir, '{0}_summary_codes_smoothed.csv'.format(video))
        motion_file = os.path.join(motion_dir,'{0}_task-movie{1}_motion.npy'.format(sub, video))

        # cut off beginning of connectivity
        dFCflat = np.load(dFCflat_file)[tshift:,:]
        dFCflat = StandardScaler().fit_transform(dFCflat)

        # cut off beginning of motion
        motion = np.load(motion_file)[tshift:,:]
        motion = StandardScaler().fit_transform(motion)

        # cut off end of video features
        features = pd.read_csv(features_file, index_col=0).iloc[:-tshift, :]
        features.iloc[:,:] = StandardScaler().fit_transform(features.to_numpy())

        # conduct separate LM per edge per feature
        for f in features_of_interest:
            feat = features[f].to_numpy()
            np.random.shuffle(feat)
            feat = np.expand_dims(feat, axis=1)
            
            beta, resid = lm_parallel(dFCflat, feat, motion)
            np.save(os.path.join(beta_dir,'{0}_task-movie{1}_nFC_{2}_beta.npy'.format(sub, video, f)), beta)