In [3]:
import pandas as pd
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
from sklearn.svm import SVR, SVC
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from SVR_functions import regress_covariates, cv_fit, predict_out, make_consistency_plot, boot_predict, permuted_p, permuted_importance, make_confusion_matrix
import seaborn as sns
import scipy.stats as scp
import timecorr as tc
import os

project_folder = '/Users/catcamacho/Library/CloudStorage/Box-Box/CCP/HBN_study/social_proc_networks'
proc_folder = os.path.join(project_folder,'dynamic_connectivity','DATA','processed_data')
null_folder = os.path.join(project_folder,'dynamic_connectivity','DATA','null_data')
sample_file = os.path.join(project_folder,'dynamic_connectivity','DATA', 'helper_files','sample_gord.32k_fs_LR.pscalar.nii')
beta_dir = os.path.join(project_folder, 'dynamic_connectivity','DATA', 'beta_maps')

# get parcel and network labels
parcel_labels = nib.load(sample_file).header.get_axis(1).name[:333]
network_labels = []
for s in parcel_labels:
    b = s.split('_')
    if len(b)<2:
        network_labels.append(b[0])
    else:
        network_labels.append(b[1])
network_labels = np.array(network_labels)
network_names, network_sizes = np.unique(network_labels, return_counts=True)

# define measures of interest
networks_of_interest = ['Auditory', 'CinguloOperc', 'Default', 'DorsalAttn', 'FrontoParietal',
                        'SMhand', 'SMmouth', 'Salience', 'VentralAttn', 'Visual']
features_of_interest = ['Anger','Happy','Fear','Sad','Excited','Brightness',
                        'SaliencyFract','Sharpness','Vibrance','Loudness','Motion']

## Combine data into feature sets

In [4]:
for movie in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(movie)), index_col=0)
    for feat in features_of_interest:
        data = []
        for sub in subinfo.index.tolist():
            data.append(np.expand_dims(np.load(os.path.join(beta_dir, '{0}_task-movie{1}_nFC_{2}_beta_withfeats.npy'.format(sub, movie, feat))), axis=1))

        combined_data = np.concatenate(data, axis=1)
        np.save(os.path.join(beta_dir, 'movie{0}_{1}_withfeats_fullsample.npy'.format(movie, feat)), combined_data)

In [None]:
for movie in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(movie)), index_col=0)
    data = []
    data_labels = pd.DataFrame()
    i=0
    for sub in subinfo.index.tolist():
        for feat in features_of_interest:
            data.append(np.expand_dims(np.load(os.path.join(beta_dir, '{0}_task-movie{1}_nFC_{2}_beta_withfeats.npy'.format(sub, movie, feat))), axis=1))
            data_labels.loc[i,'sub'] = sub
            data_labels.loc[i,'feature'] = feat
            data_labels.loc[i, ['movie','age','site','female','meanFD',
                                'righthanded','PPS_score']] = subinfo.loc[sub, ['movie','age','site','female','meanFD','righthanded','PPS_score']]
            i = i+1

    combined_data = np.concatenate(data, axis=1)
    np.save(os.path.join(beta_dir, 'movie{0}_allfeatures_fullsample.npy'.format(movie)), combined_data)
    data_labels.to_csv(os.path.join(beta_dir, 'movie{0}_allfeatures_withfeats_fullsample_labels.csv'.format(movie)))

### plot mean connectivity for each video feature

In [None]:
for movie in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(beta_dir, 'movie{0}_allfeatures_withfeats_fullsample_labels.csv'.format(movie)), index_col=0)
    for feat in features_of_interest:
        data = np.load(os.path.join(beta_dir, 'movie{0}_{1}_allfeatures_fullsample.npy'.format(movie, feat)))
        data = np.mean(data, axis=1)
        data_2d = tc.vec2mat(data)
        plt.figure(figsize=(12,10))
        sns.heatmap(data_2d, center=0)
        plt.title('{0} - {1}'.format(movie, feat))
        plt.tight_layout()
        plt.savefig(os.path.join(beta_dir, 'movie{0}_{1}_meanDFC.png'.format(movie, feat)))
        plt.close()

## Does age predict dynamic connectivity patterns?

In [None]:
for movie in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(beta_dir, 'movie{0}_allfeatures_withfeats_fullsample_labels.csv'.format(movie)), index_col=0)
    
    for feat in features_of_interest:
        print(movie, feat)
        out_folder = os.path.join(project_folder,  'dynamic_connectivity', 'ANALYSIS', 'age_analysis','movie{0}_{1}'.format(movie, feat))
        os.makedirs(out_folder, exist_ok=True)
        model = SVR(kernel='linear')
        cv = 10
    
        # load data
        X = np.load(os.path.join(beta_dir, 'movie{0}_{1}_allfeatures_fullsample.npy'.format(movie, feat)))[:, np.isfinite(subinfo['age'])].T
        Y = subinfo.loc[np.isfinite(subinfo['age']), 'age'].to_numpy()
        X_train = X[subinfo['site']=='rubic',:]
        Y_train = Y[subinfo['site']=='rubic']
        X_test = X[subinfo['site']=='cbic',:]
        Y_test = Y[subinfo['site']=='cbic']
        
        # train model
        estimators, weights, mean_weights, Y_pred_train, train_scores = cv_fit(model, X_train, Y_train, cv)

        # test model
        Y_pred_test, accuracy = predict_out(X_test, Y_test, estimators, 'regress')
        accuracy.to_csv(os.path.join(out_folder,'test_accuracy.csv'))
        #if not os.path.isfile(os.path.join(out_folder, 'bootstrapped_test_accuracy_randN.csv')):
        #    boot_predict(estimators, X_test, Y_test, out_folder, samples=1000)
        #if not os.path.isfile(os.path.join(out_folder, 'permutation_score_distribution.npy')):
        #    results = permuted_p(model, X_train, Y_train, cv, out_folder, np.mean(train_scores), -accuracy.loc['MSE','stat'])
        #if (accuracy.loc['SpearmanR','pval'] < 0.05) & (not os.path.isfile(os.path.join(out_folder, 'mean_importance.csv'))):
        #    permuted_importance(estimators, X_train, Y_train, edge_labels, out_folder)

        # plot results
        plot_file_name = os.path.join(out_folder, 'testing_data_consistency.svg')
        make_consistency_plot(Y_test, Y_pred_test, cv, plot_file_name)

In [None]:
for movie in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(project_folder, 'dynamic_connectivity', 
                                       'DATA', 'raw_data', 'sampleinfo_movie{0}.csv'.format(movie)), index_col=0)
    
    for feat in features_of_interest:
        print(movie, feat)
        out_folder = os.path.join(project_folder,  'dynamic_connectivity', 'ANALYSIS', 'puberty_analysis','movie{0}_{1}'.format(movie, feat))
        os.makedirs(out_folder, exist_ok=True)
        model = SVR(kernel='linear')
        cv = 10
    
        # load data
        X = np.load(os.path.join(beta_dir, 'movie{0}_{1}_fullsample.npy'.format(movie, feat))).T
        X_train = X[np.isfinite(subinfo['PPS_score']) & (subinfo['site']=='rubic'),:]
        Y_train = subinfo.loc[np.isfinite(subinfo['PPS_score']) & (subinfo['site']=='rubic'), 'PPS_score']
        X_test = X[np.isfinite(subinfo['PPS_score']) & (subinfo['site']=='cbic'),:]
        Y_test = subinfo.loc[np.isfinite(subinfo['PPS_score']) & (subinfo['site']=='cbic'), 'PPS_score']
        
        # train model
        estimators, weights, mean_weights, Y_pred_train, train_scores = cv_fit(model, X_train, Y_train, cv)

        # test model
        Y_pred_test, accuracy = predict_out(X_test, Y_test, estimators, 'regress')
        accuracy.to_csv(os.path.join(out_folder,'test_accuracy.csv'))
        #if not os.path.isfile(os.path.join(out_folder, 'bootstrapped_test_accuracy_randN.csv')):
        #    boot_predict(estimators, X_test, Y_test, out_folder, samples=1000)
        #if not os.path.isfile(os.path.join(out_folder, 'permutation_score_distribution.npy')):
        #    results = permuted_p(model, X_train, Y_train, cv, out_folder, np.mean(train_scores), -accuracy.loc['MSE','stat'])
        #if (accuracy.loc['SpearmanR','pval'] < 0.05) & (not os.path.isfile(os.path.join(out_folder, 'mean_importance.csv'))):
        #    permuted_importance(estimators, X_train, Y_train, edge_labels, out_folder)

        # plot results
        plot_file_name = os.path.join(out_folder, 'testing_data_consistency.svg')
        make_consistency_plot(Y_test, Y_pred_test, cv, plot_file_name)

## Are connectivity patterns across video features dissociable?

In [None]:
for movie in ['TP','DM']:
    subinfo = pd.read_csv(os.path.join(beta_dir, 'movie{0}_allfeatures_fullsample_labels.csv'.format(movie)), 
                          index_col=0)
    out_folder = os.path.join(project_folder,  'dynamic_connectivity', 'ANALYSIS', 'classification_analysis',
                              'movie{0}_allfeats'.format(movie))
    os.makedirs(out_folder, exist_ok=True)

    model = SVC()
    cv = 10
    # load data
    X = np.load(os.path.join(beta_dir, 'movie{0}_allfeatures_fullsample.npy'.format(movie))).T
    groups = subinfo.loc[(subinfo['site']=='rubic'), 'sub'].to_numpy()
    X_train = X[subinfo['site']=='rubic',:]
    Y_train = subinfo.loc[subinfo['site']=='rubic','feature']
    X_test = X[subinfo['site']=='cbic',:]
    Y_test = subinfo.loc[subinfo['site']=='cbic', 'feature']
    
    # train model
    estimators, Y_pred_train, train_scores = cv_fit(model, X_train, Y_train, cv, groups=groups)

    # test model
    Y_pred_test, accuracy = predict_out(X_test, Y_test, estimators, 'classifier')
    accuracy.to_csv(os.path.join(out_folder,'test_accuracy.csv'))
    
    # plot results
    plot_file_name = os.path.join(out_folder, 'testing_data_confusion.svg')
    make_confusion_matrix(Y_test, Y_pred_test, cv, plot_file_name)
    

In [None]:
for movie in ['DM','TP']:
    subinfo = pd.read_csv(os.path.join(beta_dir, 'movie{0}_allfeatures_fullsample_labels.csv'.format(movie)), 
                          index_col=0)
    out_folder = os.path.join(project_folder,  'dynamic_connectivity', 'ANALYSIS', 'classification_analysis',
                              'movie{0}_emotions'.format(movie))
    os.makedirs(out_folder, exist_ok=True)

    model = SVC()
    cv = 10
    # load data
    X = np.load(os.path.join(beta_dir, 'movie{0}_allfeatures_fullsample.npy'.format(movie))).T
    groups = subinfo.loc[(subinfo['site']=='rubic'), 'sub'].to_numpy()
    feat_mask = (subinfo['feature']=='Anger') | (subinfo['feature']=='Sad') | (subinfo['feature']=='Happy') | (subinfo['feature']=='Excited') | (subinfo['feature']=='Fear')
    X_train = X[subinfo['site']=='rubic',:]
    Y_train = subinfo.loc[(subinfo['site']=='rubic'),'feature']
    X_test = X[subinfo['site']=='cbic',:]
    Y_test = subinfo.loc[(subinfo['site']=='cbic'), 'feature']
    
    # train model
    estimators, Y_pred_train, train_scores = cv_fit(model, X_train, Y_train, cv, groups=groups)

    # test model
    Y_pred_test, accuracy = predict_out(X_test, Y_test, estimators, 'classifier')
    accuracy.to_csv(os.path.join(out_folder,'test_accuracy.csv'))
    
    # plot results
    plot_file_name = os.path.join(out_folder, 'testing_data_confusion.svg')
    make_confusion_matrix(Y_test, Y_pred_test, cv, plot_file_name)