# Supplementary Analysis: Motion and Video Features
This notebook examines associations among motion and video features.

In [1]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as scp

sns.set(context='talk', style='white')

project_home = '/Users/catcamacho/Library/CloudStorage/Box-Box/CCP/HBN_study/proc'
ratings_dir = os.path.join(project_home, 'first_level')
motion_dir = os.path.join(project_home, 'group','parcel_timeseries','motion')
fig_dir = os.path.join(project_home,'figs','motion')

subinfo = pd.read_csv(os.path.join(project_home, 'group','datasets','firstleveldatalabels_thresh0.8_20220412.csv'), 
                      index_col=0)

subinfo = subinfo.drop(['set','cond'], axis=1)
subinfo = subinfo.drop_duplicates()
subinfo.head()

Unnamed: 0_level_0,movie,age,site,female,meanFD,righthanded
sub,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
sub-NDARAA504CRN,TP,9.165297,cbic,1,0.480957,1
sub-NDARAA504CRN,DM,9.165297,cbic,1,0.599214,1
sub-NDARAA948VFH,TP,7.98266,rubic,1,0.102532,1
sub-NDARAA948VFH,DM,7.98266,rubic,1,0.107775,1
sub-NDARAB348EWR,TP,5.805384,cbic,0,0.372852,1


## Compile motion metrics for each video

In [None]:
for movie in ['DM','TP']:
    submotion = []
    subs = subinfo.loc[subinfo['movie']==movie,:].index.to_list()
    for s in subs:
        rawmot = os.path.join(motion_dir,'{0}_task-movie{1}_bold1_AP_Movement_Regressors_dt.txt'.format(s, movie))
        procmot = os.path.join(motion_dir,'{0}_task-movie{1}_bold1_AP_nuissance_thresh0.9.txt'.format(s, movie))
        if os.path.exists(rawmot):
            mot = np.loadtxt(rawmot)[:,6:]
        elif os.path.exists(procmot):
            mot = np.loadtxt(procmot)[:,[1,7,13,19,25,31]]
        mot[:,3:] = 50*(np.pi/180)*np.absolute(mot[:,3:])
        fd = np.expand_dims(np.sum(np.absolute(mot),axis=1), axis=1)
        mot = np.concatenate([mot,fd], axis=1)
        
        submotion.append(np.expand_dims(mot, axis=2))
    submotion = np.concatenate(submotion, axis=2)
    print(movie, submotion.shape)
    np.save(os.path.join(motion_dir, 'compiled_motion_{0}.npy'.format(movie)), submotion)

## correlate each video rating with each motion metric and plot

In [None]:
labels = ['transl_x','transl_y','transl_z','rot_x','rot_y','rot_z', 'FD']
results = {}

for movie in ['DM','TP']:
    genratings = pd.read_csv(os.path.join(ratings_dir, 'movie{0}_general.csv'.format(movie)), 
                             index_col=None).loc[:,['positive','negative','brightness','loudness','speaking','whole_words']]
    specratings = pd.read_csv(os.path.join(ratings_dir, 'movie{0}_specific.csv'.format(movie)), 
                              index_col=None).loc[:,['anger','happy','fear','sad','excite']]
    
    ratings = genratings.merge(specratings, how='left', left_index=True, right_index=True)
    motion = np.load(os.path.join(motion_dir, 'compiled_motion_{0}.npy'.format(movie)))
    perm_motion = motion.flatten()
    perm_motion = np.concatenate([perm_motion, perm_motion], axis=0)
    np.random.shuffle(perm_motion)
    perm_motion = perm_motion.reshape((motion.shape[0],motion.shape[1],2*motion.shape[2]))
    
    results[movie] = {}
    for feat in ratings.columns:
        results[movie][feat] = {}
        for i, m in enumerate(labels):
            results[movie][feat][m] = pd.DataFrame(columns = ['set', m, 'permpval'], index=range(0,2*motion.shape[2]))
            
            # compute permuted distribution
            permres = []
            for s in range(0, motion.shape[2]):
                r, _ = scp.pearsonr(perm_motion[:, i, s], ratings[feat])
                permres.append(r)
            results[movie][feat][m].iloc[motion.shape[2]:motion.shape[2]+perm_motion.shape[2], 0] = 'permuted'
            permres = np.array(permres)
            results[movie][feat][m].iloc[motion.shape[2]:motion.shape[2]+perm_motion.shape[2], 1] = permres
            perm_mean = np.mean(permres)
            
            # compute actual distribution
            res = []
            pval = []
            for s in range(0, motion.shape[2]):
                r, _ = scp.pearsonr(motion[:, i, s], ratings[feat])
                if r>0:
                    permp = (np.sum((permres>=r).astype(int)) + 1) / (len(permres) + 1)
                else:
                    permp = (np.sum((permres<=r).astype(int)) + 1) / (len(permres) + 1)
                res.append(r)
                pval.append(permp)
            results[movie][feat][m].iloc[0:motion.shape[2], 0] = 'actual'
            results[movie][feat][m].iloc[0:motion.shape[2], 1] = np.array(res)
            results[movie][feat][m].iloc[0:motion.shape[2], 2] = np.array(pval)
            actual_mean = np.mean(np.array(res))
            
            # plot the distributions
            plt.figure(figsize=(4,4))
            sns.displot(x=m, hue='set', data=results[movie][feat][m], palette=['purple','k'], kind='kde', fill=True)
            plt.axvline(x=perm_mean, color='k', label='permuted mean')
            plt.axvline(x=actual_mean, linestyle='-.', color='purple', label='actual mean')
            plt.xlabel('{0} - {1} (pearson r)'.format(feat, m))
            plt.xlim(-0.2, 0.2)
            plt.ylim(0,10)
            plt.tight_layout()
            plt.savefig(os.path.join(fig_dir, '{0}_{1}_{2}_distributionplot.svg'.format(movie, feat, m)))
            plt.close()

## How many kids show a significant association between video features and framewise displacement

In [38]:
import warnings
warnings.filterwarnings("ignore")

sample_data = {}
m='FD'

for movie in ['DM','TP']:
    motion = np.load(os.path.join(motion_dir, 'compiled_motion_{0}.npy'.format(movie)))
    sample_data[movie] = {}
    for feat in ratings.columns:
        sample_data[movie][feat] = {}
        sample_data[movie][feat][m] = subinfo.loc[subinfo['movie']==movie,:]
        sample_data[movie][feat][m].loc[:,'corr'] = results[movie][feat][m].iloc[0:motion.shape[2]+1, 1].to_numpy()
        sample_data[movie][feat][m].loc[:,'permpval'] = results[movie][feat][m].iloc[0:motion.shape[2]+1, 2].to_numpy()
        sample_data[movie][feat][m].loc[:,'permpval_sig'] = (results[movie][feat][m].iloc[0:motion.shape[2]+1, 2]<0.05).astype(int).to_numpy()
        if sum(sample_data[movie][feat][m].loc[:,'permpval_sig'])>10:
            temp = sample_data[movie][feat][m]
            for a in ['age','meanFD']:
                t, p = scp.ttest_ind(temp.loc[temp['permpval_sig']==0,a], temp.loc[temp['permpval_sig']==1,a])
                if p<0.05:
                    print(movie, feat, m)
                    print('{0}: t={1}, p={2}'.format(a, round(t,2), round(p,3)))
                    sns.boxplot(y=a,data=sample_data[movie][feat][m], x='permpval_sig')
                    sns.swarmplot(y=a,data=sample_data[movie][feat][m], x='permpval_sig', color='k')
                    sns.despine()
                    plt.tight_layout()
                    plt.savefig(os.path.join(fig_dir, '{0}_{1}_{2}_{3}_diffplot.svg'.format(movie, feat, m, a)))
                    plt.close()

DM speaking FD
meanFD: t=-2.13, p=0.034
DM whole_words FD
meanFD: t=-2.37, p=0.018
