In [1]:
from pathlib import Path
from eeg_research.system.bids_selector  import BidsArchitecture, BidsQuery, BidsPath
root = Path("/data2/Projects/eeg_fmri_natview/derivatives")
architecture = BidsArchitecture(
    root = root,
    datatype='eeg',
    suffix='eeg',
    run = "*",
    extension = '.pkl'
)


In [2]:
architecture.get_layout()

BidsArchitecture(root=PosixPath('/data2/Projects/eeg_fmri_natview/derivatives'), subject='*', session='*', datatype='eeg', task='*', acquisition=None, run='**', description=None, suffix='eeg', extension='.pkl')

In [3]:
architecture.database

Unnamed: 0,root,subject,session,datatype,task,run,acquisition,description,suffix,extension,filename
0,/data2/Projects/eeg_fmri_natview/derivatives,01,01,/data2/Projects/eeg_fmri_natview/derivatives/s...,dme,01,,gfpBk,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
1,/data2/Projects/eeg_fmri_natview/derivatives,01,01,/data2/Projects/eeg_fmri_natview/derivatives/s...,dme,01,,customGfpBk,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
2,/data2/Projects/eeg_fmri_natview/derivatives,01,01,/data2/Projects/eeg_fmri_natview/derivatives/s...,peer,01,,gfpBk,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
3,/data2/Projects/eeg_fmri_natview/derivatives,01,01,/data2/Projects/eeg_fmri_natview/derivatives/s...,peer,01,,customGfpBk,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
4,/data2/Projects/eeg_fmri_natview/derivatives,01,01,/data2/Projects/eeg_fmri_natview/derivatives/s...,monkey1,02,,gfpBk,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
...,...,...,...,...,...,...,...,...,...,...,...
2191,/data2/Projects/eeg_fmri_natview/derivatives,19,02,/data2/Projects/eeg_fmri_natview/derivatives/s...,checker,01,,customGfpBk,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
2192,/data2/Projects/eeg_fmri_natview/derivatives,19,02,/data2/Projects/eeg_fmri_natview/derivatives/s...,rest,01,,bandsGfp,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
2193,/data2/Projects/eeg_fmri_natview/derivatives,19,02,/data2/Projects/eeg_fmri_natview/derivatives/s...,rest,01,,customGfp,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...
2194,/data2/Projects/eeg_fmri_natview/derivatives,19,02,/data2/Projects/eeg_fmri_natview/derivatives/s...,checker,01,,bandsGfp,eeg,.pkl,/data2/Projects/eeg_fmri_natview/derivatives/s...


In [12]:
q = "./sub-01/ses-01/eeg/sub-01_ses-01_task-checker*_run-01*_desc-raw*_eeg.pkl"
for file in root.rglob(q):
    print(file)

/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-checker_run-01_desc-raw_eeg.pkl
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-checker_run-01_desc-rawBk_eeg.pkl


In [125]:
from pathlib import Path
from dataclasses import dataclass
from warnings import warn
import os

@dataclass
class BasePath:
    root: Path
    subject: str | None = None
    session: str | None = None
    datatype: str | None = None
    task: str | None = None
    suffix: str | None = None
    extension: str | None = None
    run: str | None = None
    acquisition: str | None = None
    description: str | None = None

    
    def __str__(self):
        string_list = []
        for attribute, value in self.__dict__.items():
            if not '_' in attribute:
                string_list.append(f"{attribute}: {value}")
        
        return '\n'.join(string_list)
                
    def _make_path(self, absolute=True):
        relative_path = Path(
            os.path.join(
                f"sub-{self.subject}",
                f"ses-{self.session}",
                self.datatype,
            )
        )

        if absolute and root:
            return self.root / relative_path
        else:
            return relative_path

    def _make_basename(self):
        fname_elem = [
            f"sub-{self.subject}",
            f"ses-{self.session}",
            f"task-{self.task}",
            self.suffix,
        ]
        if self.description:
            fname_elem.insert(3, f"desc-{self.description}")
        if self.run:
            fname_elem.insert(3, f"run-{self.run}")
        if self.acquisition:
            fname_elem.insert(3, f"acq-{self.acquisition}")
        
        return "_".join(fname_elem)
    
    def parse_filename(self, file: str | os.PathLike):
        if isinstance(file, str):
            file = Path(file)


        file_parts = {}
        desired_keys = ['task','run','desc','acq']
        splitted_filename = file.stem.split('_')

        if len(file.parts) > 2:
            file_parts['root'] = file.parents[3]
            file_parts['datatype'] = file.parents[2]
        elif len(file.parts) > 1:
            file_parts['datatype'] = file.parents[1]
        else:
            file_parts['datatype'] = splitted_filename[-1]

        file_parts['subject'] = file.name.split('_')[0].split('-')[1]
        file_parts['session'] = file.name.split('_')[1].split('-')[1]

        for desired_key in desired_keys:
            
            if desired_key in file.stem:
                value = [
                    part.split('-')[1] 
                    for part in splitted_filename
                    if desired_key in part
                ][0]
            else:
                value = None

            if desired_key == 'desc':
                desired_key = 'description'
            
            elif desired_key == 'acq':
                desired_key = 'acquisition'
            
            file_parts[desired_key] = value
                
        file_parts['suffix'] = splitted_filename[-1]
        file_parts['extension'] = file.suffix

        return file_parts

@dataclass
class BidsPath(BasePath):
    subject: str
    session: str
    datatype: str
    task: str
    suffix: str
    extension: str
    root: Path | None = None
    run: str | None = None
    acquisition: str | None = None
    description: str | None = None
    
    @classmethod
    def from_filename(cls, file: str | os.PathLike):
        file_parts = super().parse_filename(cls, file)
        return cls(**file_parts)

    @property
    def basename(self):
        return super()._make_basename()
    
    @property
    def filename(self):
        return self.basename + self.extension 
    
    @property
    def absolute_path(self):
        if self.root:
            return super()._make_path(absolute= True)
        else:
            warn("There was no root path detected. Setting relative "\
                "path as the root path")
            return super()._make_path(absolute=False)
            
    
    @property
    def relative_path(self):
        return super()._make_path(absolute= False)
    
    @property
    def fullpath(self):
        return self.pathname / self.filename

@dataclass
class BidsQuery(BidsPath):
    root: Path | str | os.PathLike
    subject: str | None = None
    session: str | None = None
    datatype: str | None = None
    task: str | None = None
    run: str | None = None
    acquisition: str | None = None
    description: str | None = None
    suffix: str | None = None
    extension: str | None = None

    def __post_init__(self) -> None:
        required_attrs = ['subject', 
                          'session', 
                          'datatype', 
                          'task', 
                          'suffix', 
                          'extension']

        for attr in required_attrs:
            if getattr(self, attr) is None:
                setattr(self, attr, '*')
        
    @property
    def filename(self):
        return super().filename.replace("_*.*","").replace("**","*")
    
    def generate(self):
        if self.root:
            return self.root.rglob(os.fspath(self.relative_path/self.filename))
        else:
            raise Exception("Root was not defined. Please instantiate the object"\
                " by setting root to a desired path")
    

In [126]:
test = BidsPath.from_filename('sub-01_ses-01_task-checker_run-01_desc-caps_brainstates.pkl')

In [127]:
print(test)

root: None
subject: 01
session: 01
datatype: brainstates
task: checker
suffix: brainstates
extension: .pkl
run: 01
acquisition: None
description: caps


In [108]:
test.session = "02"

In [109]:
test.filename

'sub-01_ses-02_task-checker_run-01_desc-caps_brainstates.pkl'

In [128]:
test.absolute_path

  warn("There was no root path detected. Setting relative "\


PosixPath('sub-01/ses-01/brainstates')

In [134]:
test = BidsQuery(root = Path(root),
                 subject = "01",
                 description = 'caps',
)

In [130]:
test.filename

'sub-01_ses-*_task-*_run-01_desc-caps_*'

In [131]:
t = Path('/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-checker_run-01_desc-caps_brainstates.pkl')

In [87]:
len(t.parts)

9

In [61]:
t.parents[3]

PosixPath('/data2/Projects/eeg_fmri_natview/derivatives')

In [135]:
for file in test.generate():
    print(file)

/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-dme_run-02_desc-caps_brainstates.tsv
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-monkey1_run-02_desc-caps_brainstates.tsv
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-monkey1_run-01_desc-caps_brainstates.tsv
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-checker_run-01_desc-caps_brainstates.tsv
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-rest_run-01_desc-caps_brainstates.tsv
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-inscapes_run-01_desc-caps_brainstates.tsv
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-dme_run-02_desc-caps_brainstates.json
/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/brainstates/sub-01_ses-01_task-mo

In [13]:
bids_path = BidsPath(root = "/data2/Projects/eeg_fmri_natview/derivatives",
                     subject="01",
                     session = "01",
                     datatype = "eeg",
                     task = "rest",
                     run = "01",
                     suffix = "eeg",
                     extension = "pkl",
)

In [15]:
bids_path

BidsPath(root=PosixPath('/data2/Projects/eeg_fmri_natview/derivatives'), subject='01', session='01', datatype='eeg', task='rest', suffix='eeg', extension='pkl', run='01', acquisition=None, description=None)

In [4]:
bids_path.subject = "02"
bids_path.fullpath

PosixPath('/data2/Projects/eeg_fmri_natview/derivatives/sub-02/ses-02/eeg/sub-02_ses-02_task-rest_run-01_eeg.pkl')

In [17]:
architecture.database['description'].unique()

array(['gfpBk', 'customGfpBk', 'rawBk', 'raw', 'gfp', 'bandsEnv',
       'customEnv', 'bandsEnvBk', 'customEnvBk', 'bandsGfpBk', 'bandsGfp',
       'customGfp', 'CustomEnvelopes', 'EEGbandsEnvelopes'], dtype=object)

In [2]:
architecture.select(subject=["01","02"])['subject'].unique()

array([], dtype=float64)

In [15]:
all(["a", None, "b"])

False

In [6]:
architecture.database['task'].unique()

array(['checker', 'rest'], dtype=object)

In [4]:
architecture.select(
    task = ['dme','dmh','inscapes','monkey1','monkey2','monkey5','peer','tp']
)

Unnamed: 0,root,subject,session,datatype,task,run,acquisition,description,suffix,extension,filename


In [3]:
a = ['a','b','caca','popo']
'caca' in a

True

In [4]:
type(architecture.database['subject'])

pandas.core.series.Series

In [None]:
import pandas as pd
def is_numerical(dataframe: pd.DataFrame, column_name: str):
    return all(dataframe[column_name].apply(lambda string: string.isdigit()))



is_numerical(architecture.database, 'session')


True

In [33]:
import numpy as np
lower = (2 <= architecture.database['subject'].astype(int))
higher = (architecture.database['subject'].astype(int) < 9)
type(lower & higher)


pandas.core.series.Series

In [32]:
min(architecture.database['subject'].astype(int))

1

In [2]:
import numpy as np
task = []
for file in root.rglob('*'):
    print(file)
    file_parts = architecture.parse_filename(file)
    task.append(file_parts['task'])

task = np.unique(task)
print(task)


/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-18_ses-01_task-tp_run-02_desc-GdCb_eeg.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-04_ses-01_task-peer_run-01_desc-GdCbBk_eeg.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-08_ses-02_task-dmh_run-02_eeg-preproc_GD_CB_BK.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-09_ses-01_task-tp_run-01_eeg-preproc_GD_CB.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-03_ses-01_task-monkey1_run-01_desc-GdCb_eeg.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-17_ses-03_task-dme_run-02_desc-GdCbBk_eeg.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-21_ses-01_task-rest_run-01_desc-GdCbBk_eeg.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-03_ses-01_task-tp_run-02_desc-GdCb_eeg.edf
/projects/EEG_FMRI/bids_eeg/BIDS/NEW/PREP_BVA_GR_CB_BK_NOV2024/sub-06_ses-02_task-monkey2_ru

In [8]:
file.name

'sub-18_ses-01_task-tp_run-02_desc-GdCb_eeg.edf'

In [7]:
architecture.parse_filename('/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-rest_run-01_desc-customGfp_eeg.pkl')

{'root': PosixPath('/data2/Projects/eeg_fmri_natview/derivatives'),
 'subject': '01',
 'session': '01',
 'datatype': 'eeg',
 'task': 'rest',
 'run': '01',
 'acquisition': None,
 'description': 'customGfp',
 'suffix': 'eeg',
 'extension': '.pkl',
 'filename': PosixPath('/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-rest_run-01_desc-customGfp_eeg.pkl')}

In [None]:
file_parts = architecture.parse_filename('/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-rest_run-01_desc-customGfp_eeg.pkl')
test = architecture._initiate_database_dict(placeholder=[])
for key, value in file_parts.items():
    test[key].append(value)

root
 /data2/Projects/eeg_fmri_natview/derivatives
subject
 01
session
 01
datatype
 eeg
task
 rest
run
 01
acquisition
 None
description
 customGfp
suffix
 eeg
extension
 .pkl
filename
 /data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-rest_run-01_desc-customGfp_eeg.pkl


In [18]:
None.copy()

AttributeError: 'NoneType' object has no attribute 'copy'

In [10]:
test

{'root': [PosixPath('/data2/Projects/eeg_fmri_natview/derivatives'),
  '01',
  '01',
  'eeg',
  'rest',
  '01',
  None,
  'customGfp',
  'eeg',
  '.pkl',
  PosixPath('/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-rest_run-01_desc-customGfp_eeg.pkl')],
 'subject': [PosixPath('/data2/Projects/eeg_fmri_natview/derivatives'),
  '01',
  '01',
  'eeg',
  'rest',
  '01',
  None,
  'customGfp',
  'eeg',
  '.pkl',
  PosixPath('/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-rest_run-01_desc-customGfp_eeg.pkl')],
 'session': [PosixPath('/data2/Projects/eeg_fmri_natview/derivatives'),
  '01',
  '01',
  'eeg',
  'rest',
  '01',
  None,
  'customGfp',
  'eeg',
  '.pkl',
  PosixPath('/data2/Projects/eeg_fmri_natview/derivatives/sub-01/ses-01/eeg/sub-01_ses-01_task-rest_run-01_desc-customGfp_eeg.pkl')],
 'datatype': [PosixPath('/data2/Projects/eeg_fmri_natview/derivatives'),
  '01',
  '01',
  'eeg',
  'rest',
  '01',
  None,
  'cus

In [17]:
print([id(lst) for lst in test.values()])

[140040774596096, 140040774596096, 140040774596096, 140040774596096, 140040774596096, 140040774596096, 140040774596096, 140040774596096, 140040774596096, 140040774596096, 140040774596096]


In [None]:
import numpy as np

np.isin(selector.files_dataframe['subject'].values,['01','05','06'])

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True,  True,  True,  True,  True,  True,
        True,  True,

In [None]:
selector.files_dataframe