In [None]:
import h5py
import numpy as np
import nibabel as nib
import os
import shutil
#f = h5py.File('/Users/ribeiroadmin/Downloads/100610.labels.hdf5', 'r')# %%
#left = f['lh']['visual_area']
#left = np.array(left)


label_files = []
subjects = []
#
#!!!
#
path_to_files = "/BULK/LABDATA/openneuro/nyu4christian/HCP"
anatomists = ["A1", "A2", "A3", "A4"]
hemisphere = ['lh', 'rh']


# create an array with all the file names containing ".labels.hdf5"
path_to_sub = ("{path_to_files}/{anatom}").format(path_to_files=path_to_files, anatom=anatomists[0])
for files in os.listdir(path_to_sub):
    if files.endswith("labels.hdf5"):
        # append all files with ".labels.hdf5" from A1
        label_files.append(files)
        # if they are not in one of the other anatomists, the file will be removed
        for anatom in anatomists:
            path_to_file = ("{path_to_files}/{anatom}/{files}").format(path_to_files=path_to_files, anatom=anatom, files=files)
            if not os.path.isfile(path_to_file):
                if files in label_files:
                    label_files.remove(files)


# create a folder for all subjects an their files containing the maps from every anatomist
new_dir = ("{path_to_files}/subjects").format(path_to_files=path_to_files)
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

# create a new array to have the subject names without the whole file name
for files in label_files:
    sub = files[:-12]
    subjects.append(sub)

print(len(subjects))
print(subjects)

# save all subjects where all data is correct
valid_subjects = []
maps = {}
for sub in subjects:
    # boolean to see if it worked
    is_valid = True

    
    # create a folder for every subject
    SUBJECTS_DIR = ("{new_dir}/{sub}").format(new_dir=new_dir, sub=sub)
    if not os.path.exists(SUBJECTS_DIR):
        os.makedirs(SUBJECTS_DIR)
    

    # create the maps from ".labels.hdf5" files
    for anatom in anatomists:
        path = ("{path_to_files}/{anatom}/{sub}.labels.hdf5").format(path_to_files=path_to_files, anatom=anatom, sub=sub)
        f = h5py.File(path, 'r')# %%
        for hemi in hemisphere:
            # put all maps into the dictionary maps and save all maps in the SUBJECTS_DIR
            try:
                with h5py.File(path, 'r') as f:
                    # if 'lh' or 'visual_area' is not in the file, the error will be saved and the files with the subject and the subject itself will be removed
                    if hemi not in f:
                        #print(f"'lh' is missing in: {path}")
                        txt = f"{path_to_files}/log.txt"
                        with open(txt, "a") as text:
                            text.write(f"{hemi} is missing in: {path}\n")
                        is_valid = False
                        break
                    if 'visual_area' not in f[hemi]:
                        #print(f"'visual_area' is missing in: {path}")
                        txt = f"{path_to_files}/log.txt"
                        with open(txt, "a") as text:
                            text.write(f"'visual_area' is missing in: {path}\n")
                        is_valid = False
                        break

                            
                    # load the maps
                    curr_map = f[hemi]['visual_area']
                    curr_map = np.array(curr_map)
                    
                    # create the dictionary with the maps
                    if sub not in maps:
                        maps[sub] = {anatom: {hemi: [] for hemi in hemisphere} for anatom in anatomists}
                    maps[sub][anatom][hemi] = curr_map
                    #print(maps[sub][anatom][hemi])
                    path_to_save = (f"{SUBJECTS_DIR}/{anatom}.{hemi}.visual_area.gii")
                    gii_data = nib.gifti.GiftiDataArray(data=curr_map)
                    gii_img = nib.gifti.GiftiImage(darrays=[gii_data])
                    nib.save(gii_img, path_to_save)

            # if it cannot open the file it will be reported
            except Exception as e:
                #print(f"Can't open file {path}: {e}")
                txt = f"{path_to_files}/log.txt"
                with open(txt, "a") as text:
                    text.write(f"Can't open file {path}\n")
                is_valid = False
                break
        if not is_valid:
            break
    if not is_valid:
        if os.path.exists(SUBJECTS_DIR):
            shutil.rmtree(SUBJECTS_DIR)
        if sub in maps:
            del maps[sub]
    else:
        valid_subjects.append(sub)
subjects = valid_subjects

print(len(subjects))
print(len(maps))





179
['199655', '130518', '209228', '214019', '131722', '365343', '203418', '115017', '155938', '601127', '130114', '158035', '116726', '467351', '585256', '158136', '385046', '191336', '536647', '187345', '901139', '360030', '146129', '783462', '212419', '562345', '191033', '197348', '732243', '134829', '757764', '263436', '899885', '814649', '214524', '186949', '145834', '789373', '140117', '239136', '169040', '725751', '181636', '177645', '134627', '318637', '871762', '251833', '177746', '192641', '765864', '389357', '178142', '200614', '943862', '905147', '249947', '638049', '135124', '162935', '193845', '191841', '406836', '381038', '111514', '395756', '973770', '146432', '100610', '125525', '878776', '150423', '581450', '412528', '167036', '330324', '393247', '178243', '156334', '951457', '680957', '617748', '182739', '115825', '397760', '671855', '283543', '547046', '995174', '380036', '706040', '159239', '146735', '128935', '164636', '966975', '196144', '173334', '901442', '1083