# Script to create list of file paths for one specific processing step + series of labels in same order

### Packages


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import re #for sorting

### Block executes the following:

### Create subject lists for each of the three TR folders (Output folders RESTplus):

In [2]:
#Natural, ascending human sorting

def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    '''
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    '''
    return [ atoi(c) for c in re.split(r'(\d+)', text) ]

#--------------------------------------------
directory = '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/REST'
subjects2=[]

for sub in os.listdir(directory):
    subjects2.append(sub)  #unordered
#-----------   
    
directory = '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2_7/REST'
subjects2_7=[]

for sub in os.listdir(directory):
    subjects2_7.append(sub)  #unordered
#-----------   
 
directory = '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2_5/REST'
subjects2_5=[]

for sub in os.listdir(directory):
    subjects2_5.append(sub)  #unordered
    
#-----------   

subjects=[]
subjects.extend(subjects2)
subjects.extend(subjects2_7)
subjects.extend(subjects2_5)

#sort naturally with increasing numbers
subjects.sort(key = natural_keys)

## Processing Steps Overview:
 - RESTT : First timepoints removed <br>
 - RESTTR : Realigned <br>
 - RESTTRW : Normalized to MNI space <br>
<br>
 - RESTTRWS : Smoothed <br>
 - RESTTRWSC : Covariate removal <br>
 - RESTTRWSCF : Bandpass filtering <br>
<br>
 - RESTTRWC : Covariate removal without prior smoothing <br>
 - RESTTRWCF ' Bandpass filtering withou prior smoothing <br>


### Selct processing step:

In [3]:
# Custom define to access other processing steps
proc_step = "RESTTRW"

'''
Example path:
"/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/RESTTRW/sub-0236/WRsub-0236_task-rest_bold.nii"
'''
proc_prefix = "WR"  
proc_suffix = "_task-rest_bold"

In [4]:
norm_mni_nifti_files = []

base_folder = "/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data"
# One folder per TR group:
folder2 = f"{base_folder}/TR_2"
folder2_5 = f"{base_folder}/TR_2_5"
folder2_7 = f"{base_folder}/TR_2_7"


for subject in subjects2:
    sub_normalizedMNI_path = f"{folder2}/Results/RESTTRW/{subject}/{proc_prefix}{subject}{proc_suffix}.nii"
    norm_mni_nifti_files.append(sub_normalizedMNI_path)
    
for subject in subjects2_7:
    sub_normalizedMNI_path = f"{folder2_7}/Results/RESTTRW/{subject}/{proc_prefix}{subject}{proc_suffix}.nii"
    norm_mni_nifti_files.append(sub_normalizedMNI_path)
    
for subject in subjects2_5:
    sub_normalizedMNI_path = f"{folder2_5}/Results/RESTTRW/{subject}/{proc_prefix}{subject}{proc_suffix}.nii"
    norm_mni_nifti_files.append(sub_normalizedMNI_path)
    
#sort naturally with increasing numbers
norm_mni_nifti_files.sort(key = natural_keys)

In [5]:
norm_mni_nifti_files
#In same order as "subjects"

['/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0236/WRsub-0236_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0237/WRsub-0237_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0238/WRsub-0238_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0239/WRsub-0239_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0240/WRsub-0240_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0241/WRsub-0241_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0242/WRsub-0242_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0243/WRsub-0243_task-rest_bold.nii',
 '/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2/Results/RESTTRW/sub-0244/WRsub-

## Load additional information to create list with labels

In [6]:
os.chdir('/dbstore/Japanese_SRPBS_MDD_dataset/info_dataset/')

participants = pd.read_csv('participants/participants.csv') #contains all patient info w/out supplement

sup2_MDD = pd.read_csv('participants/sup2.csv')  # Contains MDD patients and controls + BDI-II information


In [7]:
import pandas as pd 
import numpy as np
from nilearn import input_data
from nilearn import datasets
from nilearn.input_data import NiftiLabelsMasker
import glob
from tqdm import tqdm
from nilearn.connectome import ConnectivityMeasure
import os

  warn("Fetchers from the nilearn.datasets module will be "


In [7]:
y_diag = []

for sub in subjects:
    sub_row = sup2_MDD.loc[sup2_MDD['participants_id'] == sub] 
    sub_diag = sub_row['diag']
    if int(sub_diag) == 2:
        y_diag.append(1) #MDD
    elif int(sub_diag) == 0:
        y_diag.append(0) #Healthy control
 
    
# Convert list to series
y = pd.Series(y_diag)

In [12]:
df = sup2_MDD
df = df.assign(tc_file="",cc_file="", mni_file="")

In [13]:
df

Unnamed: 0,participants_id,site,diag,age,sex,hand,BDI-II,tc_file,cc_file,mni_file
0,sub-0236,HUH,0,39,1,1,0.0,,,
1,sub-0237,HUH,2,63,1,1,37.0,,,
2,sub-0238,HUH,0,25,2,1,3.0,,,
3,sub-0239,HUH,2,37,1,1,18.0,,,
4,sub-0240,HUH,0,48,1,1,4.0,,,
...,...,...,...,...,...,...,...,...,...,...
441,sub-1017,COI,2,35,1,2,20.0,,,
442,sub-1018,COI,2,39,2,1,34.0,,,
443,sub-1019,COI,2,45,2,1,11.0,,,
444,sub-1020,COI,2,27,1,1,50.0,,,


In [26]:
dataset = datasets.fetch_atlas_harvard_oxford('cort-maxprob-thr25-2mm', symmetric_split= True)
atlas_filename = dataset.maps
labels = dataset.labels[1:]
masker_cor = NiftiLabelsMasker(labels_img=atlas_filename, standardize=True, verbose=0)

# create masker for HO subcortical
dataset = datasets.fetch_atlas_harvard_oxford('sub-maxprob-thr25-2mm', symmetric_split= True)
atlas_filename = dataset.maps
labels_s = dataset.labels[1:]

masker_sub = NiftiLabelsMasker(labels_img=atlas_filename, standardize=True, verbose=0)

In [28]:
output_path = '/data_local/deeplearning/Datasets/Japs_MDD_pr/'
atlas_name = 'HO'

for file in tqdm(files):
    file = file.replace('Results', '')
    sub = file.split('/')[-2]
    sub_path = output_path + sub + '/' + atlas_name + '/'
    save_path =output_path + sub + '/'  + 'ATLAS'
    
    if not os.path.isdir(sub_path):
        os.makedirs(sub_path)
 
    mni_path = file
    tc_path = sub_path +'tc.npy'
    cc_path = sub_path+'cc.npy'
    
    df.loc[df['participants_id'] == sub,'tc_file'] = tc_path
    df.loc[df['participants_id'] == sub, 'cc_file'] = cc_path
    df.loc[df['participants_id'] == sub, 'mni_file'] = mni_path
    # extract timeseries and concatenate them
    time_series_c = masker_cor.fit_transform(file)
    time_series_s = masker_sub.fit_transform(file)
    tc = np.concatenate((time_series_c,time_series_s), 1)



    correlation_measure = ConnectivityMeasure(kind='correlation')
    cc = correlation_measure.fit_transform([tc])[0]

    np.save(tc_path,tc)
    np.save(cc_path,cc)
    


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 446/446 [22:42<00:00,  3.05s/it]


In [40]:
tc.shape

(97, 118)

In [17]:
print(file)df

/dbstore/Japanese_SRPBS_MDD_dataset/restplus_data/TR_2//RESTTRW/sub-0236/WRsub-0236_task-rest_bold.nii


In [20]:
from glob import glob

filess= '/dbstore/Jap_ah/*/*'
files = glob(filess)[1:]

In [29]:
df

Unnamed: 0,participants_id,site,diag,age,sex,hand,BDI-II,tc_file,cc_file,mni_file
0,sub-0236,HUH,0,39,1,1,0.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0236/SWRsub-0236_task-rest...
1,sub-0237,HUH,2,63,1,1,37.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0237/SWRsub-0237_task-rest...
2,sub-0238,HUH,0,25,2,1,3.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0238/SWRsub-0238_task-rest...
3,sub-0239,HUH,2,37,1,1,18.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0239/SWRsub-0239_task-rest...
4,sub-0240,HUH,0,48,1,1,4.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0240/SWRsub-0240_task-rest...
...,...,...,...,...,...,...,...,...,...,...
441,sub-1017,COI,2,35,1,2,20.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1017/SWRsub-1017_task-rest...
442,sub-1018,COI,2,39,2,1,34.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1018/SWRsub-1018_task-rest...
443,sub-1019,COI,2,45,2,1,11.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1019/SWRsub-1019_task-rest...
444,sub-1020,COI,2,27,1,1,50.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1020/SWRsub-1020_task-rest...


In [30]:
df.diag.replace({2:1}, inplace=True)
df

Unnamed: 0,participants_id,site,diag,age,sex,hand,BDI-II,tc_file,cc_file,mni_file
0,sub-0236,HUH,0,39,1,1,0.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0236/SWRsub-0236_task-rest...
1,sub-0237,HUH,1,63,1,1,37.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0237/SWRsub-0237_task-rest...
2,sub-0238,HUH,0,25,2,1,3.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0238/SWRsub-0238_task-rest...
3,sub-0239,HUH,1,37,1,1,18.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0239/SWRsub-0239_task-rest...
4,sub-0240,HUH,0,48,1,1,4.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0240/SWRsub-0240_task-rest...
...,...,...,...,...,...,...,...,...,...,...
441,sub-1017,COI,1,35,1,2,20.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1017/SWRsub-1017_task-rest...
442,sub-1018,COI,1,39,2,1,34.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1018/SWRsub-1018_task-rest...
443,sub-1019,COI,1,45,2,1,11.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1019/SWRsub-1019_task-rest...
444,sub-1020,COI,1,27,1,1,50.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1020/SWRsub-1020_task-rest...


In [31]:
df.rename(columns={'participants_id':'ID', 'diag':'Diagnosis', 'sex':'Sex'},inplace=True)
df


Unnamed: 0,ID,site,Diagnosis,age,Sex,hand,BDI-II,tc_file,cc_file,mni_file
0,sub-0236,HUH,0,39,1,1,0.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0236/SWRsub-0236_task-rest...
1,sub-0237,HUH,1,63,1,1,37.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0237/SWRsub-0237_task-rest...
2,sub-0238,HUH,0,25,2,1,3.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0238/SWRsub-0238_task-rest...
3,sub-0239,HUH,1,37,1,1,18.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0239/SWRsub-0239_task-rest...
4,sub-0240,HUH,0,48,1,1,4.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-0240/SWRsub-0240_task-rest...
...,...,...,...,...,...,...,...,...,...,...
441,sub-1017,COI,1,35,1,2,20.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1017/SWRsub-1017_task-rest...
442,sub-1018,COI,1,39,2,1,34.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1018/SWRsub-1018_task-rest...
443,sub-1019,COI,1,45,2,1,11.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1019/SWRsub-1019_task-rest...
444,sub-1020,COI,1,27,1,1,50.0,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/data_local/deeplearning/Datasets/Japs_MDD_pr/...,/dbstore/Jap_ah/sub-1020/SWRsub-1020_task-rest...


In [38]:
df_test.Diagnosis.value_counts()

0    54
1    36
Name: Diagnosis, dtype: int64

In [33]:
df_healthy = df[df['Diagnosis'] == 0]
df_mdd = df[df['Diagnosis'] == 1 ]



In [34]:
len(df_mdd)

177

In [44]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, test_size=0.2, stratify=df['Diagnosis'])


In [46]:
df_train.to_csv('/data/agelgazzar/projects/state-spaces/csvfiles/Jpmdd_train.csv')
df_test.to_csv('/data/agelgazzar/projects/state-spaces/csvfiles/Jpmdd_test.csv')

In [41]:
df = df.assign(nTime="")
for i in df.index:
    tc = np.load(df['tc_file'].loc[i].replace('ATLAS',atlas_name))
    ntime = tc.shape[0]
    df['nTime'].loc[i] = ntime

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [43]:
df.nTime.value_counts()

230    194
133    189
97      62
274      1
Name: nTime, dtype: int64