## The scope of this code is to run the HMM in the parcellation ROIs and generate two dictionary files:
1. the number of events 
2. the boundaries reflecting a start of a new event 

In [1]:

import warnings
warnings.filterwarnings('ignore')
import sys 
import os    
import glob
from functools import reduce
import numpy as np
from brainiak.eventseg.event import EventSegment
import nibabel as nib
from scipy.stats import zscore, norm, ttest_rel
import pandas as pd

In [None]:
#define directories
mask_dir = '/home/jovyan/paranoia_project/nh-paranoia/code/data/'
schaeffer_dir = '/home/jovyan/paranoia_project/schaeffer_dict_roi/'
save_dir = '/home/jovyan/paranoia_project/nh-paranoia/schaef_avg_events/'

#loading in the ROI information
label_node = pd.read_csv(mask_dir + '_masks/Schaefer2018_100Parcels_7Networks_order_FSLMNI152_1mm.Centroid_RAS.csv')
label_node['ROI Name']


# The loop below generates the files and rearranges the data shape
*** Make sure that your dimensions are the same before you run the model
- We used the split merge option of the model and used the [Naturalistic Data Website](https://naturalistic-data.org/content/Event_Segmentation.html) for help with the code 
- The number of events is relatively arbitrary, but we allowed for a minimum of 5 events and maximum of 45


In [5]:
results = {}  # keys are nodes
bounds = {}
bounds_sm = {}
for node in list(label_node['ROI Name']):
    results[node] = []
    bounds[node] = []
    bounds_sm[node] = []

min_here = 5
max_here = 45
k_array = np.arange(min_here, max_here, 10)
test_ll = np.zeros(len(k_array))

run = int(input('What run do you want?'))


for node in list(label_node['ROI Name']):
    print(node)
    test_ll = np.zeros(len(k_array))
    if run == 1:
        temp = np.load(schaeffer_dir + f'{node}_all_subs.npy',allow_pickle=True).item()
    else:
        temp = np.load(schaeffer_dir + f'{node}_all_subs_run{run}.npy',allow_pickle=True).item()
        
    node_loaded = np.array(list(temp.values()))
    
    #temp
    print('first',node_loaded.shape)
    

    non_zero_list = []
    for it in range(node_loaded.shape[0]):
        non_zero_list.append(node_loaded[it,:,:][~np.all(node_loaded[it,:,:] == 0, axis = 1)])
    node_loaded = np.array(non_zero_list)
    print(node_loaded.shape)
    node_loaded = np.moveaxis(node_loaded, 1,2)
    print(node_loaded.shape)
    
    n_subs, n_ts, n_vox = node_loaded.shape
    print(node_loaded.shape)
    
    
    
    ### training on half the data
    for i, k in enumerate(k_array):
        movie_train = np.mean(node_loaded[:int(n_subs/2)], axis=0)
        movie_train = zscore(movie_train,axis=0)
        
        movie_HMM = EventSegment(k)
        movie_HMM.fit(movie_train)
        movie_test = np.mean(node_loaded[int(n_subs/2):], axis=0)
        movie_test = zscore(movie_test,axis=0)
        
        _, test_ll[i] = movie_HMM.find_events(movie_test)
    max_ind = np.argmax(test_ll)

    print('Max is %d events' % k_array[max_ind])
    print('finished with part 1 for node')
    if max_ind < 6: #so that it doesn't go below 6!
        k_small = np.arange((k_array[max_ind]), (k_array[max_ind]) + 5, 1)
    else:
        k_small = np.arange((k_array[max_ind]) - 5, (k_array[max_ind]) + 5, 1)
    test_ll_small = np.zeros(len(k_small))
    for i, k in enumerate(k_small):
        movie_train = np.mean(node_loaded[:int(n_subs/2)], axis=0)
        movie_train = zscore(movie_train,axis=0)
        
        movie_HMM = EventSegment(k)
        movie_HMM.fit(movie_train)
        movie_test = np.mean(node_loaded[int(n_subs/2):], axis=0)
        movie_test = zscore(movie_test,axis=0)
        _, test_ll_small[i] = movie_HMM.find_events(movie_test)

    max_ind_fin = np.argmax(test_ll_small)
    print('Max is %d events' % k_small[max_ind_fin])
    test_ll_small[max_ind_fin]

    movie_group = np.mean(node_loaded, axis=0)
    nTRs = movie_group.shape[0]
    movie_dur = nTRs * 1  # Data acquired every 1  seconds; was 1.5 seconds for Baldassano
    

    results[node] = k_small[max_ind_fin]
    print(results[node])
    np.save(save_dir + f'events_per_roi_run{run}_schaeffer_HMM_avg_zscore.npy', results)

    
    HMMsm = EventSegment(n_events=k_small[max_ind_fin], split_merge=True)
    HMMsm.fit(movie_group)
    bounds_s = np.where(np.diff(np.argmax(HMMsm.segments_[0], axis=1)))[0]
    bounds_sm[node] = bounds_s
    print(bounds_s)
    np.save(save_dir + f'event_boundaries_run{run}_schaeffer_HMM_avg.npy', bounds_sm)


    
    
    

What run do you want? 1


7Networks_LH_Vis_1
first (17, 794, 526)
(17, 794, 526)
(17, 526, 794)
(17, 526, 794)
Max is 5 events
finished with part 1 for node
Max is 5 events
5
[134 252 342 477]
7Networks_LH_Vis_2
first (17, 1149, 526)
(17, 1149, 526)
(17, 526, 1149)
(17, 526, 1149)
Max is 5 events
finished with part 1 for node
Max is 5 events
5
[ 84 255 428 499]
7Networks_LH_Vis_3
first (17, 947, 526)
(17, 947, 526)
(17, 526, 947)
(17, 526, 947)
Max is 15 events
finished with part 1 for node
Max is 15 events
15
[ 19  47  81 132 153 189 224 259 299 333 371 420 450 495]
7Networks_LH_Vis_4
first (17, 1590, 526)
(17, 1575, 526)
(17, 526, 1575)
(17, 526, 1575)
Max is 25 events
finished with part 1 for node
Max is 25 events
25
[  5  20  35  56  90 116 136 151 166 186 208 232 257 300 317 330 351 366
 404 441 464 478 499 514]
7Networks_LH_Vis_5
first (17, 946, 526)
(17, 935, 526)
(17, 526, 935)
(17, 526, 935)
Max is 5 events
finished with part 1 for node
Max is 8 events
8
[ 17 107 171 208 253 392 476]
7Networks_LH_Vis_6