In [7]:
import os
import random

In [1]:
data_dir = "/data/fmri/data"

In [5]:
def get_counts(val_pct):
    hc_subs = [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 16, 19, 20, 21, 22, 23, 24, 25, 28, 29, 30, 31, 33]
    tbi_subs = [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 34, 35, 36]
    
    val_hc_count = int(len(hc_subs)*val_pct)
    val_tbi_count = int(len(tbi_subs)*val_pct)
    hc_val = random.sample(hc_subs, val_hc_count)
    tbi_val = random.sample(tbi_subs, val_tbi_count)
    hc_train = list(set(hc_subs) - set(hc_val))
    tbi_train = list(set(tbi_subs) - set(tbi_val))
    
    return hc_train, hc_val, tbi_train, tbi_val

def split_train_val(val_pct=0.2):
    hc_train, hc_val, tbi_train, tbi_val = get_counts(val_pct)
    train_subs, val_subs = [], []
    for tr in hc_train:
        train_subs.append(f'sub-hc{tr:03}')
    for tr in tbi_train:
        train_subs.append(f'sub-tbi{tr:03}')
    for tst in hc_val:
        val_subs.append(f'sub-hc{tst:03}')
    for tst in tbi_val:
        val_subs.append(f'sub-tbi{tst:03}')

    return train_subs, val_subs

In [3]:
def get_score(sub_path, img_name):
    sess_id = img_name.split(".scale")[0][-1]
    n_back = img_name[0]
    file_path = os.path.join(sub_path, f"{n_back}back_VAS-f.1D")
    with open(file_path, "r") as label_file:
        scores = label_file.readlines()
        try:
            return scores[int(sess_id)]
        except IndexError:
            return None

In [4]:
def prep_path_labels(subs):
    img_paths, labels = [], []
    for sub in subs:
        sub_path = os.path.join(data_dir, sub)
        for img_name in os.listdir(os.path.join(sub_path, f"{sub}.preproc")):
            if "nii.gz" not in img_name:
                continue
            img_path = os.path.join(sub_path, f"{sub}.preproc", img_name)
            label = get_score(sub_path, img_name)
            if label is None:
                continue

            img_paths.append(img_path)
            labels.append(int(label))
            
    return img_paths, labels

In [19]:
train_subs, val_subs = split_train_val(val_pct=0.2)

In [20]:
val_subs

['sub-hc022',
 'sub-hc002',
 'sub-hc003',
 'sub-hc033',
 'sub-tbi004',
 'sub-tbi011',
 'sub-tbi019',
 'sub-tbi014',
 'sub-tbi036']

In [21]:
val_img_paths, val_labels = prep_path_labels(val_subs)

In [22]:
val_img_paths[:10]

['/data/fmri/data/sub-hc022/sub-hc022.preproc/2back.r02.scale.nii.gz',
 '/data/fmri/data/sub-hc022/sub-hc022.preproc/2back.r03.scale.nii.gz',
 '/data/fmri/data/sub-hc022/sub-hc022.preproc/2back.r01.scale.nii.gz',
 '/data/fmri/data/sub-hc022/sub-hc022.preproc/2back.r04.scale.nii.gz',
 '/data/fmri/data/sub-hc022/sub-hc022.preproc/0back.r01.scale.nii.gz',
 '/data/fmri/data/sub-hc022/sub-hc022.preproc/0back.r03.scale.nii.gz',
 '/data/fmri/data/sub-hc022/sub-hc022.preproc/0back.r02.scale.nii.gz',
 '/data/fmri/data/sub-hc022/sub-hc022.preproc/0back.r04.scale.nii.gz',
 '/data/fmri/data/sub-hc002/sub-hc002.preproc/0back.r04.scale.nii.gz',
 '/data/fmri/data/sub-hc002/sub-hc002.preproc/0back.r01.scale.nii.gz']

In [23]:
val_labels[:10]

[5, 5, 5, 5, 5, 5, 5, 5, 30, 30]

In [24]:
import pandas as pd

In [25]:
train_img_paths, train_labels = prep_path_labels(train_subs)
val_img_paths, val_labels = prep_path_labels(val_subs)

In [28]:
train_data = pd.DataFrame(list(zip(train_img_paths, train_labels)))
val_data = pd.DataFrame(list(zip(val_img_paths, val_labels)))

In [29]:
train_data

Unnamed: 0,0,1
0,/data/fmri/data/sub-hc001/sub-hc001.preproc/0b...,30
1,/data/fmri/data/sub-hc001/sub-hc001.preproc/0b...,25
2,/data/fmri/data/sub-hc001/sub-hc001.preproc/0b...,30
3,/data/fmri/data/sub-hc001/sub-hc001.preproc/0b...,25
4,/data/fmri/data/sub-hc001/sub-hc001.preproc/2b...,20
5,/data/fmri/data/sub-hc001/sub-hc001.preproc/2b...,15
6,/data/fmri/data/sub-hc001/sub-hc001.preproc/2b...,10
7,/data/fmri/data/sub-hc001/sub-hc001.preproc/2b...,15
8,/data/fmri/data/sub-hc004/sub-hc004.preproc/2b...,0
9,/data/fmri/data/sub-hc004/sub-hc004.preproc/2b...,0


In [30]:
val_data

Unnamed: 0,0,1
0,/data/fmri/data/sub-hc022/sub-hc022.preproc/2b...,5
1,/data/fmri/data/sub-hc022/sub-hc022.preproc/2b...,5
2,/data/fmri/data/sub-hc022/sub-hc022.preproc/2b...,5
3,/data/fmri/data/sub-hc022/sub-hc022.preproc/2b...,5
4,/data/fmri/data/sub-hc022/sub-hc022.preproc/0b...,5
5,/data/fmri/data/sub-hc022/sub-hc022.preproc/0b...,5
6,/data/fmri/data/sub-hc022/sub-hc022.preproc/0b...,5
7,/data/fmri/data/sub-hc022/sub-hc022.preproc/0b...,5
8,/data/fmri/data/sub-hc002/sub-hc002.preproc/0b...,30
9,/data/fmri/data/sub-hc002/sub-hc002.preproc/0b...,30
