## Get the MultiRat_se dataset into BIDS format

In [4]:
import nibabel as nib
import pandas as pd
import json
import os
import re

### Write README file

In [21]:
readme_content = """

# Dataset Title

This is the MultiRat_se dataset collection, consisting of N=22 datasets representative of the rat sensory-evoked fMRI acquisitions from 12 research institutions. We are thankful to the many contributors. 

## BIDS Version : 1.10.0.

## Metadata
`dataset_description.json` provides details about the dataset, authors and fundings.
`participants.tsv` contains basic demographic information such as age, gender, and group.

"""

# Define the file path
file_path = '/project/4180000.19/multirat_stim/BIDS_scratch/README.txt'  

# Save the README content to the specified file
with open(file_path, 'w') as f:
    f.write(readme_content)
    


### Write participants file (.tsv and .json)


#### participants.json 


In [4]:
import json

# Define the metadata for participants.json
participants_json = {
    "Dataset_id": "Dataset identifier",
    "Subject_id": "Rat identifier",
    "Rat_strain": "Rat strain",
    "Rat_sex": "Rat sex",
    "Rat_age": "Rat age in months reported in 2 months bins",
    "Rat_weight": "Rat weight in grams reported in 50 grams bins",
    "Anesthesia_induction": "Anesthesia used for induction. Multiple agents are reported using / divider",
    "Anesthesia_induction_dose": "Anesthesia dose for induction. For isoflurane, reported in percentage, for injectables, reported in mg/kg",
    "Anesthesia_maintenance": "Anesthesia used for maintenance. Multiple agents are reported using / divider",
    "Anesthesia_maintenance_dose": "Anesthesia dose for maintenance. For isoflurane, reported in percentage, for injectables, reported in mg/kg/h",
    "Anesthesia_post_induction_time": "Reported time between anesthesia induction and functional imaging in minutes",
    "Anesthesia_breathing_assistance": "Rat breathing assistance. Either free-breathing or mechanically ventilated",
    "Anesthesia_breathing_rate": "Reported rat breathing rate in breath per minute, bined to the next tenth",
    "Anesthesia_temperature": "Reported rat temperature in degree celsius (°C)",
    
    "MRI_vendor": "Vendor of the magnet",
    "MRI_field_strength": "Magnet field strength in Tesla",
    
    "Anat_sequence": "Anatomical sequence",
    "Anat_contrast": "Contrast of the anatomical sequence",
    "Anat_TR": "Repetition time in seconds of the anatomical sequence",
    "Anat_TE": "Echo time in seconds of the anatomical sequence",
    
    "Func_sequence": "Functional sequence. Either gradient echo (GE-) or spin echo (SE-)",
    "Func_TR": "Repetition time in seconds of the functional sequence",
    "Func_TE": "Echo time in seconds of the functional sequence",
    "Func_FA": "Flip angle in degrees of the functional sequence",
    "Func_sensory_system": "Sensory system stimulated during the sensory evoked task. Either, forepaw, hindpaw, eyes or whiskers",
    "Func_sensory_stimulation": "Sensory stimulation type. Either, electrical, thermal, visual, optognetic or mechanical deflection"
}

bids_dir = '/project/4180000.19/multirat_stim/BIDS_scratch/'

# Write the dataset_description.json file
participants_output_path = os.path.join(bids_dir, 'participants.json')

with open(participants_output_path, 'w') as f:
    json.dump(participants_json, f, indent=4)
    
print(f'dataset_description.json created at {participants_output_path}')



dataset_description.json created at /project/4180000.19/multirat_stim/BIDS_scratch/participants.json


#### participants_tsv

In [5]:
# Path to the metadata_stand.tsv file
metadata_path = '/home/traaffneu/margal/code/multirat_se/script/table/metadata_stand.tsv'
metadata = pd.read_csv(metadata_path, sep='\t')
datasets_path = '/project/4180000.19/multirat_stim/BIDS_scratch'
participants_output_path = '/project/4180000.19/multirat_stim/BIDS_scratch/'

# Create a dictionary with the correct method to access columns
participants_tsv = {
    "Dataset_id": metadata['rat.ds'].astype(str),
    "Subject_id": metadata['rat.sub'].astype(str),
    "Rat_strain": metadata['rat.strain'].astype(str),
    "Rat_sex": metadata['rat.sex'].astype(str),
    "Rat_age": metadata['rat.age'].astype(str),
    "Rat_weight": metadata['rat.weight.orig'].astype(str),
    "Anesthesia_induction": metadata['anesthesia.induction'].astype(str),
    "Anesthesia_induction_dose": metadata['anesthesia.induction.dose'].astype(str),
    "Anesthesia_maintenance": metadata['anesthesia.maintenance'].astype(str),
    "Anesthesia_maintenance_dose": metadata['anesthesia.maintenance.dose'].astype(str),
    "Anesthesia_post_induction_time": metadata['anesthesia.post.induction.time'].astype(str),
    "Anesthesia_breathing_assistance": metadata['anesthesia.breathing.assistance'].astype(str),
    "Anesthesia_breathing_rate": metadata['anesthesia.breathing.rate'].astype(str),
    "Anesthesia_temperature": metadata['anesthesia.temperature.org'].astype(str),
    
    "MRI_vendor": metadata['MRI.vendor'].astype(str),
    "MRI_field_strength": metadata['MRI.field.strength'].astype(str),
    
    "Anat_sequence": metadata['anat.sequence'].astype(str),
    "Anat_contrast": metadata['anat.contrast'].astype(str),
    "Anat_TR": metadata['anat.TR'].astype(str),
    "Anat_TE": metadata['anat.TE'].astype(str),
    
    "Func_sequence": metadata['func.sequence'].astype(str),
    "Func_TR": metadata['func.TR'].astype(str),
    "Func_TE": metadata['func.TE'].astype(str),
    "Func_FA": metadata['func.FA'].astype(str),
    "Func_sensory_system": metadata['func.sensory.system'].astype(str),
    "Func_sensory_stimulation": metadata['func.sensory.stimulation'].astype(str)
}

# Create DataFrame and save participants.tsv
participants_df = pd.DataFrame(participants_tsv)
participants_df.to_csv(participants_output_path + 'participants.tsv', sep='\t', index=False)


## Write dataset_description.json

In [6]:
dataset_description = {
    "Name": "MultiRat_se",
    "BIDSVersion": "1.10.0",
    "DatasetType": "raw",
    "ReferencesAndLinks": [
        "GitHub repository: https://github.com/grandjeanlab/multirat_se",
        "Pre-registration: https://doi.org/10.17605/OSF.IO/8VY9R"
    ],
    "Authors": [
        "Marie E Galteau", "Margaret Broadwater", "Yi Chen", "Gabriel Desrosiers-Gregoire", 
        "Yujian Diao", "Rita Gil", "Johannes Kaesser", "Eugene Kim", 
        "Pervin Kiryagdi", "Henriette Lambers", "Yanyan Y Liu", "Xavier Lopez-Gil", 
        "Eilidh MacNicol", "Parastoo Mohebkhodaei", "Ricardo X N. De Oliveira", 
        "Hiroi Nonaka", "Carolina A. Pereira", "Henning M Reimann", 
        "Alejandro Rivera-Olvera", "Rie Ryoke", "Erwan Selingue", 
        "Nikoloz Sirmpilatze", "Sandra Strobelt", "Akira Sumiyoshi", 
        "Channelle Tham", "Raul Tudela", "Roel M. Vrooman", "Yongzhi Zhang", 
        "Wessel A van Engelenburg", "Isabel Wank", "Jurgen Baudewig", 
        "Susann Boretius", "Diana Cash", "M Mallar Chakravarty", 
        "Kai-Hsiang Chuang", "Luisa Ciobanu", "Gabriel A Devenyi", 
        "Cornelius Faber", "Andreas Hess", "Judith R Homberg", 
        "Ileana O Jelescu", "Carles Justicia", "Ryuta Kawashima", 
        "Thoralf Niendorf", "Tom WJ Scheenen", "Noam Shemesh", 
        "Guadalupe Soria", "Nick Todd", "Lydia Wachsmuth", "Xin Yu", 
        "Baogui B Zhang", "Yen-Yu Ian Shih", "Sung-Ho Lee", "Joanes Grandjean"
    ],
    "Acknowledgements": (
    "This project was kindly supported by the Dutch Research Council (OCENW.KLEIN.334, OSF23.1.037), "
    "National Institute of Health (K01EB023983, T32 AA007573), "
    "Deutsche Forschungsgemeinschaft (406818964), "
    "UK Biotechnology and Biological Sciences Research Council (BBSRC, BB/N009088/1), "
    "UK Medical Research Council (MR/N013700/1), "
    "European Research Council (ERC; agreement No. PI18/00893, 896245, 679058), "
    "Ministerio de Economía y Competitividad (DPI2015-64358-C2-2-R), "
    "Fonds de recherche du Québec, "
    "Interdisciplinary Center for Clinical Research Münster (PIX), "
    "Fundação para a Ciência e Tecnologia (Portugal, project 275-FCT-PTDC/BBB-IMG/5132/2014)."
    ), 
}

# Path to the BIDS dataset root directory
bids_dir = '/project/4180000.19/multirat_stim/BIDS_scratch/'

# Write the dataset_description.json file
output_path = os.path.join(bids_dir, 'dataset_description.json')

with open(output_path, 'w') as f:
    json.dump(dataset_description, f, indent=4)

print(f'dataset_description.json created at {output_path}')


dataset_description.json created at /project/4180000.19/multirat_stim/BIDS_scratch/dataset_description.json


## Convert anat and func to json

#### Func scans

In [7]:
# Convert FUNCTIONAL Nifti files to JSON

def nifti_to_json(nifti_path, json_path, metadata, metadata_index):
    
    # Load the NIfTI file
    nifti = nib.load(nifti_path)
    metadata = pd.read_csv(metadata_path, sep='\t')
    #metadata = metadata.loc[(metadata['exclude'] != 'yes')]

    # Extract metadata and ensure all numpy arrays are converted to lists
    extracted_metadata = {
        "Subject": str(metadata.iloc[metadata_index]["rat.sub"]),
        "RatStrain": str(metadata.iloc[metadata_index]["rat.strain"]),
        "RatSex": str(metadata.iloc[metadata_index]["rat.sex"]),
        "AnesthesiaMaintenance": str(metadata.iloc[metadata_index]["anesthesia.maintenance"]),
        "TaskName": "sensory-stimulation",
        
        "Paradigm": {
            "SensorySystem": str(metadata.iloc[metadata_index]["func.sensory.system"]),
            "Type": str(metadata.iloc[metadata_index]["func.sensory.stimulation"]),
            "Onsets": str(metadata.iloc[metadata_index]["func.sensory.onset"]),
            "Duration": str(metadata.iloc[metadata_index]["func.sensory.duration"])
        },
        "MRIVendor": str(metadata.iloc[metadata_index]["MRI.vendor"]),
        "MagneticFieldStrength": float(metadata.iloc[metadata_index]["MRI.field.strength"]),
        "EchoTime": float(metadata.iloc[metadata_index]["func.TE"]),
        "RepetitionTime": float(metadata.iloc[metadata_index]["func.TR"]),
        "Sequence": str(metadata.iloc[metadata_index]["func.sequence"])
    }
    
    # Write metadata to JSON file
    with open(json_path, 'w') as json_file:
        json.dump(extracted_metadata, json_file, indent=4)

# Path to the metadata_stand.tsv file
metadata_path = '/home/traaffneu/margal/code/multirat_se/script/table/metadata_stand.tsv'
metadata = pd.read_csv(metadata_path, sep='\t')
datasets_path = '/project/4180000.19/multirat_stim/BIDS_scratch'

# Iterate through directories in sorted order
for root, dirs, files in sorted(os.walk(datasets_path)):
    for file in sorted(files):  # Ensure files are sorted
        if 'func' in root and (file.endswith('.nii') or file.endswith('.nii.gz')):
           
            nifti_path = os.path.join(root, file) 
            subject_num= file.split('_')[0].split('-')[1][1:]
            metadata_index = metadata[metadata['rat.sub'] == int(subject_num)].index[0]
            print(subject_num)
            print(file)

            if file.endswith('.nii.gz'):
                json_path = nifti_path.replace('.nii.gz', '.json')
            else:
                json_path = nifti_path.replace('.nii', '.json')

            nifti_to_json(nifti_path, json_path, metadata, metadata_index)

            # Ensure metadata_index is within bounds of the metadata DataFrame
            if metadata_index < len(metadata):
                nifti_to_json(nifti_path, json_path, metadata, metadata_index)
            else:
                print("Metadata index exceeds the number of available metadata rows.")
                break



200100
sub-0200100_ses-1_task-sensorystim_run-1_bold.nii.gz
200101
sub-0200101_ses-1_task-sensorystim_run-1_bold.nii.gz
200102
sub-0200102_ses-1_task-sensorystim_run-1_bold.nii.gz
200103
sub-0200103_ses-1_task-sensorystim_run-1_bold.nii.gz
200104
sub-0200104_ses-1_task-sensorystim_run-1_bold.nii.gz
200105
sub-0200105_ses-1_task-sensorystim_run-1_bold.nii.gz
200106
sub-0200106_ses-1_task-sensorystim_run-1_bold.nii.gz
200107
sub-0200107_ses-1_task-sensorystim_run-1_bold.nii.gz
200108
sub-0200108_ses-1_task-sensorystim_run-1_bold.nii.gz
200109
sub-0200109_ses-1_task-sensorystim_run-1_bold.nii.gz
200200
sub-0200200_ses-1_task-sensorystim_run-1_bold.nii.gz
200201
sub-0200201_ses-1_task-sensorystim_run-1_bold.nii.gz
200202
sub-0200202_ses-1_task-sensorystim_run-1_bold.nii.gz
200203
sub-0200203_ses-1_task-sensorystim_run-1_bold.nii.gz
200204
sub-0200204_ses-1_task-sensorystim_run-1_bold.nii.gz
200205
sub-0200205_ses-1_task-sensorystim_run-1_bold.nii.gz
200206
sub-0200206_ses-1_task-sensorysti

#### Anat scans

In [27]:
# Convert ANATOMICAL Nifti files to JSON

def nifti_to_json(nifti_path, json_path, metadata, metadata_index):

    nifti = nib.load(nifti_path)

    # Extract metadata and ensure all numpy arrays are converted to lists
    extracted_metadata = {
        'Subject': str(metadata.iloc[metadata_index]['rat.sub']),
        'RatStrain': str(metadata.iloc[metadata_index]['rat.strain']),
        'RatSex': str(metadata.iloc[metadata_index]['rat.sex']),
        'AnesthesiaMaintenance': str(metadata.iloc[metadata_index]['anesthesia.maintenance']),
        'MRIVendor': str(metadata.iloc[metadata_index]['MRI.vendor']),
        'MagneticFieldStrength': float(metadata.iloc[metadata_index]['MRI.field.strength']),
        'RepetitionTime': float(metadata.iloc[metadata_index]['anat.TR']),
        'Sequence': str(metadata.iloc[metadata_index]['anat.sequence']) 
    }
    
   # Optional: Validate EchoTime to be non-negative if not excluded
    if subject_num != '020140' and metadata.iloc[metadata_index]["anat.TE"] >= 0:
        extracted_metadata['EchoTime'] = float(metadata.iloc[metadata_index]['anat.TE'])

    # Write metadata to JSON file
    with open(json_path, 'w') as json_file:
        json.dump(extracted_metadata, json_file, indent=4)

# Path to the metadata_stand.tsv file
metadata_path = '/home/traaffneu/margal/code/multirat_se/script/table/metadata_stand.tsv'
metadata = pd.read_csv(metadata_path, sep='\t')
datasets_path = '/project/4180000.19/multirat_stim/BIDS_scratch'

# Iterate through directories in sorted order
for root, dirs, files in sorted(os.walk(datasets_path)):
    for file in sorted(files):  # Ensure files are sorted
        if 'anat' in root and (file.endswith('.nii') or file.endswith('.nii.gz')):
           
            nifti_path = os.path.join(root, file) 
            subject_num= file.split('_')[0].split('-')[1][1:]
            metadata_index = metadata[metadata['rat.sub'] == int(subject_num)].index[0]
            print(subject_num)
            print(file)

            if file.endswith('.nii.gz'):
                json_path = nifti_path.replace('.nii.gz', '.json')
            else:
                json_path = nifti_path.replace('.nii', '.json')

            nifti_to_json(nifti_path, json_path, metadata, metadata_index)

            # Ensure metadata_index is within bounds of the metadata DataFrame
            if metadata_index < len(metadata):
                nifti_to_json(nifti_path, json_path, metadata, metadata_index)
            else:
                print("Metadata index exceeds the number of available metadata rows.")
                break


200100
sub-0200100_ses-1_T2w.nii.gz
200101
sub-0200101_ses-1_T2w.nii.gz
200102
sub-0200102_ses-1_T2w.nii.gz
200103
sub-0200103_ses-1_T2w.nii.gz
200104
sub-0200104_ses-1_T2w.nii.gz
200105
sub-0200105_ses-1_T2w.nii.gz
200106
sub-0200106_ses-1_T2w.nii.gz
200107
sub-0200107_ses-1_T2w.nii.gz
200108
sub-0200108_ses-1_T2w.nii.gz
200109
sub-0200109_ses-1_T2w.nii.gz
200200
sub-0200200_ses-1_T2w.nii.gz
200201
sub-0200201_ses-1_T2w.nii.gz
200202
sub-0200202_ses-1_T2w.nii.gz
200203
sub-0200203_ses-1_T2w.nii.gz
200204
sub-0200204_ses-1_T2w.nii.gz
200205
sub-0200205_ses-1_T2w.nii.gz
200206
sub-0200206_ses-1_T2w.nii.gz
200207
sub-0200207_ses-1_T2w.nii.gz
200208
sub-0200208_ses-1_T2w.nii.gz
200300
sub-0200300_ses-1_T2w.nii.gz
200301
sub-0200301_ses-1_T2w.nii.gz
200302
sub-0200302_ses-1_T2w.nii.gz
200303
sub-0200303_ses-1_T2w.nii.gz
200304
sub-0200304_ses-1_T2w.nii.gz
200305
sub-0200305_ses-1_T2w.nii.gz
200306
sub-0200306_ses-1_T2w.nii.gz
200307
sub-0200307_ses-1_T2w.nii.gz
200308
sub-0200308_ses-1_T2w

### Write event file

#### Rename func scan's names: add _task-sensorystim 

In [3]:
# --- Rename to add task param in nifti and json files names --- 

datasets_path = '/project/4180000.19/multirat_stim/BIDS_scratch'

# Iterate through directories in sorted order
for root, dirs, files in os.walk(datasets_path):
    if 'func' in root:  # Check if we're in a 'func' directory
        for file in files:
            # Match the BIDS naming convention for functional files
            match = re.match(r'sub-(\d+)_ses-(\d+)_run-(\d+)(.*)', file)
            if match:
                subject = match.group(1)
                session = match.group(2)
                run = match.group(3)
                suffix = match.group(4)  # Capture any existing suffix (like '.nii.gz' or '.json')

                # Create new filename with '_task-sensorystim' included
                new_file_name = f'sub-{subject}_ses-{session}_task-sensorystim_run-{run}{suffix}'

                # Check if the new filename is already used
                if new_file_name != file:
                    old_file_path = os.path.join(root, file)
                    new_file_path = os.path.join(root, new_file_name)
                    
                    # Rename the file
                    os.rename(old_file_path, new_file_path)
                    print(f'Renamed: {old_file_path} to {new_file_path}')


#### Create event file in sub directories


In [10]:
def event_file(metadata, metadata_index, output_path):

    # Extract metadata and ensure all numpy arrays are converted to lists
    extracted_metadata = {
        'onset': [str(metadata.iloc[metadata_index]['func.sensory.onset'])],  
        'duration': [str(metadata.iloc[metadata_index]['func.sensory.duration'])], 
    }

    # Write .tsv 
    df = pd.DataFrame(extracted_metadata)
    df.to_csv(output_path+'task-sensorystim_events.tsv', sep='\t', index=False)

In [11]:
# Path to the metadata_stand.tsv file
metadata_path = '/home/traaffneu/margal/code/multirat_se/script/table/metadata_stand.tsv'
metadata = pd.read_csv(metadata_path, sep='\t')
datasets_path = '/project/4180000.19/multirat_stim/BIDS_scratch'

# Iterate through directories in sorted order
for root, dirs, files in sorted(os.walk(datasets_path)):
    for file in sorted(files):  # Ensure files are sorted
        if 'anat' in root and (file.endswith('.nii') or file.endswith('.nii.gz')):
           
            subject_num= file.split('_')[0].split('-')[1][1:]
            metadata_index = metadata[metadata['rat.sub'] == int(subject_num)].index[0]
            print(subject_num)

            output_path = f'/project/4180000.19/multirat_stim/BIDS_scratch/sub-0{subject_num}/'
            event_file(metadata, metadata_index, output_path)

            # Ensure metadata_index is within bounds of the metadata DataFrame
            if metadata_index < len(metadata):
                event_file(metadata, metadata_index, output_path)
            else:
                print("Metadata index exceeds the number of available metadata rows.")
                break
            

200100
200101
200102
200103
200104
200105
200106
200107
200108
200109
200200
200201
200202
200203
200204
200205
200206
200207
200208
200300
200301
200302
200303
200304
200305
200306
200307
200308
200309
200400
200401
200402
200403
200404
200405
200406
200407
200408
200409
200500
200501
200502
200503
200504
200505
200506
200507
200508
200509
200600
200601
200602
200603
200604
200605
200606
200607
200608
200609
200700
200701
200702
200703
200704
200705
200706
200707
200800
200801
200802
200803
200804
200805
200806
200807
200808
200900
200901
200902
200903
200904
200905
200906
200907
200908
200909
201000
201001
201002
201003
201004
201005
201006
201007
201008
201009
201100
201101
201102
201103
201104
201105
201106
201107
201108
201109
201200
201201
201202
201203
201204
201205
201206
201207
201208
201300
201301
201302
201303
201304
201305
201306
201307
201308
201309
201400
201401
201402
201403
201404
201405
201406
201407
201408
201409
201500
201501
201502
201503
201504
201505
201506
201507