In [1]:
import nibabel as nib
import numpy as np
import pandas as pd
import os

# Behavior Source Data

In [2]:
# Reading
behavior_path = '../../data/BSNIP_behavior/behavior/'

behavior_files = os.listdir(behavior_path)

behavior_source = pd.read_csv(behavior_path+behavior_files[0], sep='\t')
for behavior_file in behavior_files[1:]:
    curr_behavior_source = pd.read_csv(behavior_path+behavior_file, sep='\t')
    behavior_source = pd.concat([behavior_source, curr_behavior_source], axis=0)


In [3]:
behavior_source

Unnamed: 0,session_id,Group,BACS_Dig_Seq_z,BACS_Sym_Cod_z,BACS_Tok_Mot_z,BACS_Tower_z,BACS_Verb_Flu_z,BACS_Verb_Mem_z,PANSS_p1,PANSS_p2,...,PANSS_g7,PANSS_g8,PANSS_g9,PANSS_g10,PANSS_g11,PANSS_g12,PANSS_g13,PANSS_g14,PANSS_g15,PANSS_g16
0,S0009QPW3,SADP,-1.098039,-0.762157,-1.285204,-0.494802,0.370624,-1.415961,5.0,3.0,...,1.0,1.0,4.0,1.0,2.0,5.0,1.0,1.0,2.0,2.0
0,S0015SRH1,SCZP,-3.113095,-2.056288,-1.948905,0.444853,0.312787,1.073826,4.0,5.0,...,1.0,1.0,4.0,1.0,2.0,4.0,3.0,1.0,2.0,2.0
0,S0023IXS1,BPP,0.199438,-0.416925,-2.703231,-0.388889,-0.726917,-0.519926,3.0,2.0,...,2.0,2.0,3.0,3.0,2.0,2.0,3.0,2.0,2.0,2.0
0,S0027SLU2,SCZP,1.053571,-1.264732,0.240876,1.547794,-3.091076,0.514541,3.0,1.0,...,2.0,1.0,3.0,1.0,1.0,3.0,1.0,1.0,1.0,3.0
0,S0053KTM1,SADP,-1.029762,-2.232190,-2.496350,-1.761029,-1.159154,-1.610738,6.0,3.0,...,1.0,2.0,2.0,1.0,3.0,1.0,3.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,S9903MNH1,CON,1.351190,1.197889,0.240876,0.444853,-0.147194,1.968680,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,S9911MFE2,BPP,1.114078,-0.731731,-0.242067,1.364407,0.780303,1.849645,1.0,1.0,...,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,2.0,2.0
0,S9967ESA3,BPP,-1.070388,-2.943269,-2.780598,-0.330508,-2.818182,-2.547518,2.0,1.0,...,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,S9976LRC1,CON,1.351190,-0.649077,0.240876,0.077206,1.968721,0.626398,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# Processing new GBC Cole-Antisevic parcellation data

In [4]:
import os
file_list = os.listdir('../../data/BSNIP_cole_anticevic/pscalar/')
current_data_session_id = [f.split('.')[0] for f in file_list]

In [5]:
# Concatenating Cole Anticevic data with previous data

import pickle
def get_file_data(file):
    return nib.load('../../data/BSNIP_cole_anticevic/pscalar/' + file).get_fdata()[0]

if os.path.exists('../../data/pickles/pscalar_data_array.pickle'):
    with open('../../data/pickles/pscalar_data_array.pickle', 'rb') as f:
        pscalar_data_array = pickle.load(f)
else:
    pscalar_data_array = np.array([get_file_data(f) for f in file_list])
    with open('../../data/pickles/pscalar_data_array.pickle', 'wb') as f:
        pickle.dump(pscalar_data_array, f)

In [6]:
# Naming the columns
column_names = [f"X{i}" for i in range(1, 13)]
pscalar_data = pd.DataFrame(pscalar_data_array, columns=column_names)

# Adding file identifiers
pscalar_data['session_id'] = current_data_session_id
pscalar_data.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,session_id
0,0.99217,0.993106,0.991749,0.993544,0.994307,0.994294,0.994504,0.990248,0.993739,0.994609,0.979843,0.990038,S0009QPW3
1,0.73137,0.696538,0.735051,0.755902,0.737969,0.722893,0.751288,0.684636,0.683967,0.771312,0.334062,0.646695,S0015SRH1
2,0.709105,0.734446,0.639646,0.577776,0.691755,0.634361,0.718729,0.588338,0.646455,0.613515,0.623112,0.514197,S0023IXS1
3,0.991679,0.993263,0.987765,0.993559,0.993737,0.992619,0.994004,0.991507,0.993447,0.994353,0.983234,0.990196,S0027SLU2
4,0.697531,0.585341,0.625612,0.613556,0.608432,0.535144,0.719219,0.642556,0.624463,0.727577,0.436187,0.519657,S0053KTM1


In [7]:
# Merging new Cole Anticevic data with behavior data to get the groups
data = pd.merge(behavior_source[['session_id', 'Group']], pscalar_data, on='session_id', how='inner')
data.to_csv('../../data/processed/pscalar/cole_anticevic_pscalar.csv', index=False)
data

Unnamed: 0,session_id,Group,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12
0,S0009QPW3,SADP,0.992170,0.993106,0.991749,0.993544,0.994307,0.994294,0.994504,0.990248,0.993739,0.994609,0.979843,0.990038
1,S0015SRH1,SCZP,0.731370,0.696538,0.735051,0.755902,0.737969,0.722893,0.751288,0.684636,0.683967,0.771312,0.334062,0.646695
2,S0023IXS1,BPP,0.709105,0.734446,0.639646,0.577776,0.691755,0.634361,0.718729,0.588338,0.646455,0.613515,0.623112,0.514197
3,S0027SLU2,SCZP,0.991679,0.993263,0.987765,0.993559,0.993737,0.992619,0.994004,0.991507,0.993447,0.994353,0.983234,0.990196
4,S0053KTM1,SADP,0.697531,0.585341,0.625612,0.613556,0.608432,0.535144,0.719219,0.642556,0.624463,0.727577,0.436187,0.519657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
632,S9903MNH1,CON,0.788583,0.714288,0.774410,0.766365,0.782074,0.771091,0.747428,0.767038,0.805095,0.774871,0.578664,0.630177
633,S9911MFE2,BPP,0.724470,0.654571,0.707543,0.738206,0.659883,0.707094,0.726479,0.550128,0.638377,0.688742,0.527777,0.409208
634,S9967ESA3,BPP,0.713498,0.675381,0.660575,0.646995,0.628018,0.709588,0.675211,0.451564,0.601938,0.705835,0.359076,0.524108
635,S9976LRC1,CON,0.706421,0.704267,0.707284,0.735963,0.773401,0.732413,0.701030,0.700782,0.742644,0.761457,0.415371,0.615903
