In [1]:
import pandas as pd
import numpy as np

In [2]:
subs = pd.read_csv("../etc/subs_table.csv")

sub_ses = subs.apply(
    lambda row: f"sub-{row['subject']}_ses-{row['session']}",
    axis=1
)
subs.insert(3, "sub_ses", sub_ses)

subs.head()

Unnamed: 0,dataset,subject,session,sub_ses,fmap,task
0,CCNP,colornest001,1,sub-colornest001_ses-1,none,task-rest_run-01
1,CCNP,colornest001,1,sub-colornest001_ses-1,none,task-rest_run-02
2,CCNP,colornest012,1,sub-colornest012_ses-1,none,task-rest_run-01
3,CCNP,colornest012,1,sub-colornest012_ses-1,none,task-rest_run-02
4,CCNP,colornest023,1,sub-colornest023_ses-1,none,task-rest_acq-VARIANTObliquity_run-02


In [3]:
counts = subs.groupby(["dataset", "session", "fmap", "task"]).count()

In [6]:
counts.query("fmap == 'epi'").sort_values("subject", ascending=False).head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,subject,sub_ses
dataset,session,fmap,task,Unnamed: 4_level_1,Unnamed: 5_level_1
HBN,HBNsiteRU,epi,task-peer_run-1,58,58
HBN,HBNsiteRU,epi,task-movieTP,47,47
HBN,HBNsiteRU,epi,task-movieDM,43,43
HBN,HBNsiteRU,epi,task-rest_run-1,41,41
HBN,HBNsiteRU,epi,task-peer_run-2,40,40
HBN,HBNsiteCBIC,epi,task-peer_run-1,38,38
HBN,HBNsiteRU,epi,task-peer_run-3,35,35
HBN,HBNsiteCBIC,epi,task-movieDM,32,32
HBN,HBNsiteRU,epi,task-rest_run-2,32,32
HBN,HBNsiteCBIC,epi,task-rest_run-1,28,28


In [7]:
rng = np.random.default_rng(2022)

datasets = ["NKI", "HBN", "PNC"]
fmaps = ["none", "epi", "phasediff"]
tasks = ["task-rest_acq-1400", "task-rest_run-1", "task-rest_acq-singleband"]

select_subs = []

f = open("../etc/30_subs.txt", "w")

for ds, fmap, task in zip(datasets, fmaps, tasks):
    batch = subs.query(
        f"dataset == '{ds}' and fmap == '{fmap}' and task == '{task}'"
    )
    batch_subs = np.unique(batch["sub_ses"].values)
    batch_subs = rng.choice(batch_subs, 10, replace=False)
    batch = batch.loc[np.isin(batch["sub_ses"], batch_subs), :]
    select_subs.append(batch)

    print(f"# Fieldmap: {fmap}; Dataset: {ds}; Task: {task}", file=f)
    print("\n".join(batch["sub_ses"].values) + "\n", file=f)

select_subs = pd.concat(select_subs)
f.close()

In [8]:
select_subs

Unnamed: 0,dataset,subject,session,sub_ses,fmap,task
902,NKI,A00031893,BAS1,sub-A00031893_ses-BAS1,none,task-rest_acq-1400
933,NKI,A00035377,BAS1,sub-A00035377_ses-BAS1,none,task-rest_acq-1400
963,NKI,A00035827,BAS2,sub-A00035827_ses-BAS2,none,task-rest_acq-1400
1107,NKI,A00040815,BAS1,sub-A00040815_ses-BAS1,none,task-rest_acq-1400
1171,NKI,A00052182,FLU2,sub-A00052182_ses-FLU2,none,task-rest_acq-1400
1634,NKI,A00065992,TRT,sub-A00065992_ses-TRT,none,task-rest_acq-1400
1729,NKI,A00066860,BAS1,sub-A00066860_ses-BAS1,none,task-rest_acq-1400
1752,NKI,A00066865,BAS1,sub-A00066865_ses-BAS1,none,task-rest_acq-1400
1827,NKI,A00074709,BAS1,sub-A00074709_ses-BAS1,none,task-rest_acq-1400
2158,NKI,A00081552,FLU1,sub-A00081552_ses-FLU1,none,task-rest_acq-1400


In [9]:
len(np.unique(select_subs["subject"]))

30

In [10]:
select_subs.to_csv("../etc/select_subs.csv", index=False)