In [1]:
import os
import os.path as op
import platform
import shutil
import sys
from glob import glob
from collections import OrderedDict as od

import numpy as np
import pandas as pd

# Read Excel inputs

In [2]:
csv_dir = "/home/mac/dschonhaut/projects/ideas/data/metadata" if (platform.system()=='Linux') else 
source_dir = "/mnt/coredata/Projects/IDEAS_FullAnalysis"
target_dir = "/mnt/coredata/Projects/IDEAS/data"

# Load the ideas_paths ssheet.
ideas_paths = pd.read_csv(
    "/Users/dschonhaut/Box/projects/ideas/data/metadata/IDEAS_paths_forDaniel_01-17-23_NM.csv"
)
assert ideas_paths["ID"].size == ideas_paths["ID"].nunique()
ideas_paths.set_index("ID", inplace=True)
print("ideas_paths: {}".format(ideas_paths.shape))

SyntaxError: invalid syntax (2416205788.py, line 1)

# Create ideas_reorg

In [None]:
# Create the ideas_reorg dataframe.
ideas_reorg_cols = od([
    # Directory organization
    # ----------------------
    ("subj_dir_source", "string"),        # subject dir where all recon dirs are saved
    ("dcm_dir_used_source", "string"),    # recon dir used for processing; we do *.dcm here (always "path_to_dcm_folder")
    ("rPOP_dir_source", "string"),        # dir where rPOP files are found ("FIX_path_ALL" if it exists, otherwise "path_to_rPOP_outputs")
    ("failed_rPOP_dir_source", "string"), # dir where failed rPOP files are found; "path_to_rPOP_outputs" if "FIX_path_ALL" exists
    ("subj_dir_target", "string"),        # proj_dir/subj; subject dir where all data will be copied
    ("dcm_dir_used_target", "string"),    # recon dir used for processing
    ("raw_dir_target", "string"),         # proj_dir/subj/raw
    ("proc_dir_target", "string"),        # proj_dir/subj/proc
    ("realign_dir_target", "string"),     # proj_dir/subj/proc/intermed/realign
    ("rPOP_dir_target", "string"),        # proj_dir/subj/proc/intermed/rPOP
    ("empty_voxel_dir_target", "string"), # proj_dir/subj/proc/intermed/empty_voxels
    ("failed_rPOP_dir_target", "string"), # proj_dir/subj/proc/intermed/.rPOP_failed
    
    # Raw PET
    # -------
    ("n_recon_dirs", int),                # number of recon dirs
    ("n_recon_dcms", int),                # number of recon'd .dcm's in raw_dir/
    ("n_recon_niis", int),                # number of converted .nii's in recon_used/ (not realigned frames or their mean)
    ("recon_dirs", object),               # {old_path: new_path} all recon dirs
    ("recon_niis", object),               # {old_path: new_path} 1+ .nii frames obtained directly from dcm2nii conversion
    ("dcmHeaders", object),               # {old_path: new_path} the DICOM header info file dcmHeaders.mat
    
    # Processed PET
    # -------------
    # Realign
    ("realigned_niis", object),           # {old_path: new_path} realigned .nii frames
    ("rmean_nii", object),                # {old_path: new_path} mean PET across realigned .nii frames
    ("rp_rframes", object),               # {old_path: new_path} linear transform for PET frame realignment
    
    # rPOP
    ("rrmean_nii", object),               # {old_path: new_path} recentered, mean PET
    ("wrrmean_nii", object),              # {old_path: new_path} warped, recentered, mean PET
    ("sn_rrmean", object),                # {old_path: new_path} non-linear transform from native to template space
    ("swrrmean_nii", object),             # {old_path: new_path} smoothed, warped, recentered, mean PET
    ("automask_res", object),             # {old_path: new_path} estimated native resolution of the wrrmean PET
    
    # Empty voxel estimation
    ("empty_voxel_mask", "string"),       # .nii file with 1's where the value < empty_voxel_thresh and 0's otherwise
    ("empty_voxel_thresh", "string"),     # .txt file with the empty voxel threshold, defined as mean + 2SD for values in the halo ROI
    
    # Final SUVR
    ("suvr-wcbl_swrrmean_nii", "string"), # .nii file to the SUVR image (processed normally, w/o removing empty voxels)
    
    # Failed rPOP
    ("first_rPOP_failed", bool),          # True if auto rPOP led to warping failure and PET was reprocessed with manual recentering
    ("failed_rrmean_nii", object),        # {old_path: new_path} recentered, mean PET
    ("failed_wrrmean_nii", object),       # {old_path: new_path} warped, recentered, mean PET
    ("failed_sn_rrmean", object),         # {old_path: new_path} non-linear transform from native to template space
    ("failed_swrrmean_nii", object),      # {old_path: new_path} smoothed, warped, recentered, mean PET
    ("failed_automask_res", object),      # {old_path: new_path} estimated native resolution of the wrrmean PET
])
ideas_reorg = pd.DataFrame({c: pd.Series(dtype=t) for c, t in ideas_reorg_cols.items()}, index=ideas_paths.index)
print(f"ideas_reorg: {ideas_reorg.shape}")

## Directory organization

In [163]:
def _get_subj_dir_source(dcm_dir_used_source, subj):
    sdcm_dir_used_source = dcm_dir_used_source.split(os.sep)
    subj_idx = sdcm_dir_used_source.index(str(subj))
    return os.sep.join(sdcm_dir_used_source[: subj_idx + 1])


def _get_rPOP_dir_source(fix_path_all, path_to_rpop_outputs):
    if pd.isna(fix_path_all):
        return op.dirname(op.normpath(path_to_rpop_outputs))
    return op.dirname(op.normpath(fix_path_all))


def _get_failed_rPOP_dir_source(fix_path_all, path_to_rpop_outputs):
    if pd.isna(fix_path_all):
        return np.nan
    return op.dirname(op.normpath(path_to_rpop_outputs))


def _get_dcm_dir_used_target(row):
    return row["dcm_dir_used_source"].replace(row["subj_dir_source"], row["raw_dir_target"])


def _get_failed_rPOP_dir_target(first_rPOP_failed, subj_dir_target):
    if first_rPOP_failed:
        return op.join(subj_dir_target, "proc", "intermed", ".rPOP_failed")
    return np.nan
    

def _get_recon_dirs(subj_dir_source, raw_dir_target):
    recon_dirs_source = sorted([path for path in glob(op.join(subj_dir_source, "*")) if op.isdir(path)])
    recon_dirs_target = [path.replace(subj_dir_source, raw_dir_target) for path in recon_dirs_source]
    return od(zip(recon_dirs_source, recon_dirs_target))


ideas_reorg["dcm_dir_used_source"] = ideas_paths["path_to_dcm_folder"].apply(op.normpath)
ideas_reorg["subj_dir_source"] = ideas_reorg.apply(lambda x: _get_subj_dir_source(x["dcm_dir_used_source"], x.name), axis=1)
ideas_reorg["rPOP_dir_source"] = ideas_paths.apply(lambda x: _get_rPOP_dir_source(x["FIX_path_ALL"], x["path_to_rPOP_outputs"]), axis=1)
ideas_reorg["failed_rPOP_dir_source"] = ideas_paths.apply(lambda x: _get_failed_rPOP_dir_source(x["FIX_path_ALL"], x["path_to_rPOP_outputs"]), axis=1)
ideas_reorg["subj_dir_target"] = [op.join(proj_dir, str(subj)) for subj in ideas_paths.index]
ideas_reorg["raw_dir_target"] = ideas_reorg["subj_dir_target"].apply(lambda x: op.join(x, "raw"))
ideas_reorg["dcm_dir_used_target"] = ideas_reorg.apply(lambda x: _get_dcm_dir_used_target(x), axis=1)
ideas_reorg["proc_dir_target"] = ideas_reorg["subj_dir_target"].apply(lambda x: op.join(x, "proc"))
ideas_reorg["realign_dir_target"] = ideas_reorg["subj_dir_target"].apply(lambda x: op.join(x, "proc", "intermed", "realign"))
ideas_reorg["rPOP_dir_target"] = ideas_reorg["subj_dir_target"].apply(lambda x: op.join(x, "proc", "intermed", "rPOP"))
ideas_reorg["empty_voxel_dir_target"] = ideas_reorg["subj_dir_target"].apply(lambda x: op.join(x, "proc", "intermed", "empty_voxels"))
ideas_reorg["first_rPOP_failed"] = ideas_paths["FIX_path_ALL"].apply(lambda x: not pd.isna(x))
ideas_reorg["failed_rPOP_dir_target"] = ideas_reorg.apply(lambda x: _get_failed_rPOP_dir_target(x["first_rPOP_failed"], x["subj_dir_target"]), axis=1)
ideas_reorg["recon_dirs"] = ideas_reorg.apply(lambda x: _get_recon_dirs(x["subj_dir_source"], x["raw_dir_target"]), axis=1)
ideas_reorg["n_recon_dirs"] = ideas_reorg["recon_dirs"].apply(lambda x: len(x) if not pd.isna(x) else np.nan)

print("'recon_dirs': {}/{} NA values".format(pd.isna(ideas_reorg["recon_dirs"]).sum(), len(ideas_reorg)))
print("'n_recon_dirs': {}/{} NA values".format(pd.isna(ideas_reorg["n_recon_dirs"]).sum(), len(ideas_reorg)))

'recon_dirs': 0/10700 NA values
'n_recon_dirs': 0/10700 NA values


In [None]:
# Create new database structure and copy raw recon folders in.
overwrite = False
# ------------------------------------

count = 0
for subj, row in ideas_reorg.iterrows():
    for oldpath, newpath in row["recon_dirs"].items():
        if np.all((op.isdir(oldpath), np.any((overwrite, not op.exists(newpath))))):
            shutil.copytree(src=oldpath, dst=newpath)
    recon_link = op.join(row["raw_dir_target"], "recon_used")
    if overwrite or not op.exists(recon_link):
        if op.islink(recon_link):
            os.unlink(recon_link)
    os.symlink(row["dcm_dir_used_target"], recon_link)
    count += 1

print(count, subj)

## Raw PET

In [164]:
def _get_n_recon_dcms(raw_dir_target):
    if pd.isna(raw_dir_target) or not op.isdir(raw_dir_target):
        return np.nan
    recon_used = op.join(raw_dir_target, "recon_used")
    if not op.islink(recon_used):
        return np.nan
    dcm_files = glob(op.join(recon_used, "*.dcm"))
    return len(dcm_files)


def _get_dcmHeaders(raw_dir_target, subj):
    if pd.isna(raw_dir_target) or not op.isdir(raw_dir_target):
        return np.nan
    recon_used = op.join(raw_dir_target, "recon_used")
    if not op.islink(recon_used):
        return np.nan
    dcm_hdr_source = op.join(recon_used, "dcmHeaders.mat")
    dcm_hdr_target = op.join(recon_used, "dcmHeaders_{}.mat".format(subj))
    if not op.isfile(dcm_hdr_source):
        return np.nan
    return od({dcm_hdr_source: dcm_hdr_target})

    
def _get_recon_niis(raw_dir_target, subj):
    if pd.isna(raw_dir_target) or not op.isdir(raw_dir_target):
        return np.nan
    recon_used = op.join(raw_dir_target, "recon_used")
    if not op.islink(recon_used):
        return np.nan
    recon_niis_source = sorted(glob(op.join(recon_used, "*.nii")))
    recon_niis_source = [
        f for f in recon_niis_source
        if not np.any((op.basename(f).startswith("r"),
                       op.basename(f).endswith("Averaged.nii")))
    ]
    if len(recon_niis_source) == 0:
        return np.nan
    recon_niis_target = []
    for iFrame in range(1, len(recon_niis_source)+1):
        recon_niis_target.append(op.join(recon_used, "frame{:>03}_{}.nii".format(iFrame, subj)))
    return od(zip(recon_niis_source, recon_niis_target))


ideas_reorg["n_recon_dcms"] = ideas_reorg["raw_dir_target"].apply(_get_n_recon_dcms)
ideas_reorg["dcmHeaders"] = ideas_reorg.apply(lambda x: _get_dcmHeaders(x["raw_dir_target"], x.name), axis=1)
ideas_reorg["recon_niis"] = ideas_reorg.apply(lambda x: _get_recon_niis(x["raw_dir_target"], x.name), axis=1)
ideas_reorg["n_recon_niis"] = ideas_reorg["recon_niis"].apply(lambda x: len(x) if not pd.isna(x) else np.nan)


print("'n_recon_dcms': {}/{} NA values".format(pd.isna(ideas_reorg["n_recon_dcms"]).sum(), len(ideas_reorg)))
print("'dcmHeaders': {}/{} NA values".format(pd.isna(ideas_reorg["dcmHeaders"]).sum(), len(ideas_reorg)))
print("'recon_niis': {}/{} NA values".format(pd.isna(ideas_reorg["recon_niis"]).sum(), len(ideas_reorg)))
print("'n_recon_niis': {}/{} NA values".format(pd.isna(ideas_reorg["n_recon_niis"]).sum(), len(ideas_reorg)))

'n_recon_dcms': 0/10700 NA values
'dcmHeaders': 7/10700 NA values
'recon_niis': 0/10700 NA values
'n_recon_niis': 0/10700 NA values


In [165]:
ideas_reorg.loc[pd.isna(ideas_reorg["dcmHeaders"])].index.tolist()

[50651, 51445, 54136, 55545, 56959, 60746, 64150]

In [161]:
# Rename raw .nii frames.
overwrite = False
# ------------------------------------

count = 0
for subj, row in ideas_reorg.iterrows():
    if not pd.isna(row["recon_niis"]):
        for oldpath, newpath in row["recon_niis"].items():
            if np.all((op.isfile(oldpath), np.any((overwrite, not op.exists(newpath))))):
                os.makedirs(op.dirname(newpath), exist_ok=True)
                os.rename(oldpath, newpath)
    count += 1

print(count, subj)

10700 64681


## Realign

In [168]:
def _get_realigned_niis(raw_dir_target, realign_dir_target, subj):
    if np.any((pd.isna(raw_dir_target), pd.isna(realign_dir_target))) or not op.isdir(raw_dir_target):
        return np.nan
    recon_used = op.join(raw_dir_target, "recon_used")
    if not op.islink(recon_used):
        return np.nan
    realigned_niis_source = sorted(glob(op.join(recon_used, "r*.nii")))
    if len(realigned_niis_source) == 0:
        return np.nan
    realigned_niis_target = []
    for iFrame in range(1, len(realigned_niis_source)+1):
        realigned_niis_target.append(op.join(realign_dir_target, "rframe{:>03}_{}.nii".format(iFrame, subj)))
    return od(zip(realigned_niis_source, realigned_niis_target))
    
    
def _get_rmean(raw_dir_target, realign_dir_target, recon_niis, subj):
    if np.any((pd.isna(raw_dir_target), pd.isna(realign_dir_target))) or not op.isdir(raw_dir_target):
        return np.nan
    recon_used = op.join(raw_dir_target, "recon_used")
    if not op.islink(recon_used):
        return np.nan
    rmean_source = glob(op.join(recon_used, "*Averaged.nii"))
    rmean_target = op.join(realign_dir_target, "rmean_{}.nii".format(subj))
    if (len(rmean_source) == 0) and (len(recon_niis) == 1):
        return od({list(recon_niis.values())[0]: rmean_target})
    elif len(rmean_source) == 1:
        return od({rmean_source[0]: rmean_target})
    return np.nan
    

def _get_rp_rframes(raw_dir_target, realign_dir_target, subj):
    if np.any((pd.isna(raw_dir_target), pd.isna(realign_dir_target))) or not op.isdir(raw_dir_target):
        return np.nan
    recon_used = op.join(raw_dir_target, "recon_used")
    if not op.islink(recon_used):
        return np.nan
    rp_rframes_source = glob(op.join(recon_used, "rp*.txt"))
    rp_rframes_target = op.join(realign_dir_target, "rp_rframes_{}.txt".format(subj))
    if len(rp_rframes_source) != 1:
        return np.nan
    return od({rp_rframes_source[0]: rp_rframes_target})
    
    
ideas_reorg["realigned_niis"] = ideas_reorg.apply(lambda x: _get_realigned_niis(x["raw_dir_target"], x["realign_dir_target"], x.name), axis=1)
ideas_reorg["rmean_nii"] = ideas_reorg.apply(lambda x: _get_rmean(x["raw_dir_target"], x["realign_dir_target"], x["recon_niis"], x.name), axis=1)
ideas_reorg["rp_rframes"] = ideas_reorg.apply(lambda x: _get_rp_rframes(x["raw_dir_target"], x["realign_dir_target"], x.name), axis=1)


print("'realigned_niis': {}/{} NA values".format(pd.isna(ideas_reorg.loc[ideas_reorg["n_recon_niis"]>1, "realigned_niis"]).sum(),
                                                 len(ideas_reorg.loc[ideas_reorg["n_recon_niis"]>1])))
print("'rmean_nii': {}/{} NA values".format(pd.isna(ideas_reorg["rmean_nii"]).sum(), len(ideas_reorg)))
print("'rp_rframes': {}/{} NA values".format(pd.isna(ideas_reorg.loc[ideas_reorg["n_recon_niis"]>1, "rp_rframes"]).sum(),
                                             len(ideas_reorg.loc[ideas_reorg["n_recon_niis"]>1])))

'realigned_niis': 9726/10700 NA values
'rmean_nii': 4/10700 NA values
'rp_rframes': 9726/10700 NA values


In [175]:
# Move or copy files into the realign directory.
overwrite = False
# ------------------------------------

count = 0
for subj, row in ideas_reorg.iterrows():
    if not pd.isna(row["realigned_niis"]):
        for oldpath, newpath in row["realigned_niis"].items():
            if np.all((op.isfile(oldpath), np.any((overwrite, not op.exists(newpath))))):
                os.makedirs(op.dirname(newpath), exist_ok=True)
                os.rename(oldpath, newpath)
    if not pd.isna(row["rmean_nii"]):
        for oldpath, newpath in row["rmean_nii"].items():
            if np.all((op.isfile(oldpath), np.any((overwrite, not op.exists(newpath))))):
                os.makedirs(op.dirname(newpath), exist_ok=True)
                if op.basename(oldpath).endswith("Averaged.nii"):
                    os.rename(oldpath, newpath)
                else:
                    shutil.copy(oldpath, newpath)
    if not pd.isna(row["rp_rframes"]):
        for oldpath, newpath in row["rp_rframes"].items():
            if np.all((op.isfile(oldpath), np.any((overwrite, not op.exists(newpath))))):
                os.makedirs(op.dirname(newpath), exist_ok=True)
                os.rename(oldpath, newpath)
    count += 1

print(subj, count)

64681 10700


## rPOP

In [194]:
def _get_rrmean(rPOP_dir_source, rPOP_dir_target, subj,
                ideas_paths, first_rPOP_failed, return_failed=False):
    if np.any((pd.isna(rPOP_dir_source), pd.isna(rPOP_dir_target))):
        return np.nan
    if first_rPOP_failed:
        if return_failed:
            rrmean_source = ideas_paths.loc[subj, "path_to_nativenii"]
        else:
            rrmean_source = glob(op.join(rPOP_dir_source, "{}*.nii".format(subj)))
            if len(rrmean_source) == 1:
                rrmean_source = rrmean_source[0]
            else:
                return np.nan
    elif return_failed:
        return np.nan
    else:
        rrmean_source = ideas_paths.loc[subj, "path_to_nativenii"]
    rrmean_target = op.join(rPOP_dir_target, "rrmean_{}.nii".format(subj))
    return od({rrmean_source: rrmean_target})
    
    
def _get_wrrmean(rPOP_dir_source, rPOP_dir_target, subj,
                ideas_paths, first_rPOP_failed, return_failed=False):
    if np.any((pd.isna(rPOP_dir_source), pd.isna(rPOP_dir_target))):
        return np.nan
    if first_rPOP_failed:
        if return_failed:
            wrrmean_source = ideas_paths.loc[subj, "path_to_warpednii"]
        else:
            wrrmean_source = glob(op.join(rPOP_dir_source, "w{}*.nii".format(subj)))
            if len(wrrmean_source) == 1:
                wrrmean_source = wrrmean_source[0]
            else:
                return np.nan
    elif return_failed:
        return np.nan
    else:
        wrrmean_source = ideas_paths.loc[subj, "path_to_warpednii"]
    wrrmean_target = op.join(rPOP_dir_target, "wrrmean_{}.nii".format(subj))
    return od({wrrmean_source: wrrmean_target})
    

def _get_sn_rrmean(rPOP_dir_source, rPOP_dir_target, subj):
    if np.any((pd.isna(rPOP_dir_source), pd.isna(rPOP_dir_target))):
        return np.nan
    sn_rrmean_source = glob(op.join(rPOP_dir_source, "*{}*sn.mat".format(subj)))
    if len(sn_rrmean_source) != 1:
        return np.nan
    sn_rrmean_source = sn_rrmean_source[0]
    sn_rrmean_target = op.join(rPOP_dir_target, "sn_rrmean_{}.mat".format(subj))
    return od({sn_rrmean_source: sn_rrmean_target})


def _get_swrrmean(rPOP_dir_source, rPOP_dir_target, subj,
                ideas_paths, first_rPOP_failed, return_failed=False):
    if np.any((pd.isna(rPOP_dir_source), pd.isna(rPOP_dir_target))):
        return np.nan
    if first_rPOP_failed:
        if return_failed:
            swrrmean_source = ideas_paths.loc[subj, "path_to_smoothed_nii"]
        else:
            swrrmean_source = glob(op.join(rPOP_dir_source, "sw{}*.nii".format(subj)))
            if len(swrrmean_source) == 1:
                swrrmean_source = swrrmean_source[0]
            else:
                return np.nan
    elif return_failed:
        return np.nan
    else:
        swrrmean_source = ideas_paths.loc[subj, "path_to_smoothed_nii"]
    swrrmean_target = op.join(rPOP_dir_target, "s10wrrmean_{}.nii".format(subj))
    return od({swrrmean_source: swrrmean_target})


def _get_automask_res(rPOP_dir_source, rPOP_dir_target, subj):
    if np.any((pd.isna(rPOP_dir_source), pd.isna(rPOP_dir_target))):
        return np.nan
    automask_source = glob(op.join(rPOP_dir_source, "*{}*automask.txt".format(subj)))
    if len(automask_source) != 1:
        return np.nan
    automask_source = automask_source[0]
    automask_target = op.join(rPOP_dir_target, "automask-res_wrrmean_{}.txt".format(subj))
    return od({automask_source: automask_target})
    
    
ideas_reorg["rrmean_nii"] = ideas_reorg.apply(lambda x: _get_rrmean(x["rPOP_dir_source"], x["rPOP_dir_target"], x.name, ideas_paths, x["first_rPOP_failed"], return_failed=False), axis=1)
ideas_reorg["failed_rrmean_nii"] = ideas_reorg.apply(lambda x: _get_rrmean(x["failed_rPOP_dir_source"], x["failed_rPOP_dir_target"], x.name, ideas_paths, x["first_rPOP_failed"], return_failed=True), axis=1)
ideas_reorg["wrrmean_nii"] = ideas_reorg.apply(lambda x: _get_wrrmean(x["rPOP_dir_source"], x["rPOP_dir_target"], x.name, ideas_paths, x["first_rPOP_failed"], return_failed=False), axis=1)
ideas_reorg["failed_wrrmean_nii"] = ideas_reorg.apply(lambda x: _get_wrrmean(x["failed_rPOP_dir_source"], x["failed_rPOP_dir_target"], x.name, ideas_paths, x["first_rPOP_failed"], return_failed=True), axis=1)
ideas_reorg["sn_rrmean"] = ideas_reorg.apply(lambda x: _get_sn_rrmean(x["rPOP_dir_source"], x["rPOP_dir_target"], x.name), axis=1)
ideas_reorg["failed_sn_rrmean"] = ideas_reorg.apply(lambda x: _get_sn_rrmean(x["failed_rPOP_dir_source"], x["failed_rPOP_dir_target"], x.name), axis=1)
ideas_reorg["swrrmean_nii"] = ideas_reorg.apply(lambda x: _get_swrrmean(x["rPOP_dir_source"], x["rPOP_dir_target"], x.name, ideas_paths, x["first_rPOP_failed"], return_failed=False), axis=1)
ideas_reorg["failed_swrrmean_nii"] = ideas_reorg.apply(lambda x: _get_swrrmean(x["failed_rPOP_dir_source"], x["failed_rPOP_dir_target"], x.name, ideas_paths, x["first_rPOP_failed"], return_failed=True), axis=1)
ideas_reorg["automask_res"] = ideas_reorg.apply(lambda x: _get_automask_res(x["rPOP_dir_source"], x["rPOP_dir_target"], x.name), axis=1)
ideas_reorg["failed_automask_res"] = ideas_reorg.apply(lambda x: _get_automask_res(x["failed_rPOP_dir_source"], x["failed_rPOP_dir_target"], x.name), axis=1)


for col in ["rrmean_nii", "wrrmean_nii", "sn_rrmean", "swrrmean_nii", "automask_res"]:
    print("'{}': {}/{} NA values".format(col, pd.isna(ideas_reorg[col]).sum(), len(ideas_reorg)))
for col in ["failed_rrmean_nii", "failed_wrrmean_nii", "failed_sn_rrmean", "failed_swrrmean_nii", "failed_automask_res"]:
    print("'{}': {}/{} NA values".format(col, pd.isna(ideas_reorg.loc[ideas_reorg["first_rPOP_failed"]==True, col]).sum(),
                                         len(ideas_reorg.loc[ideas_reorg["first_rPOP_failed"]==True])))

'rrmean_nii': 0/10700 NA values
'wrrmean_nii': 0/10700 NA values
'sn_rrmean': 43/10700 NA values
'swrrmean_nii': 0/10700 NA values
'automask_res': 43/10700 NA values
'failed_rrmean_nii': 0/239 NA values
'failed_wrrmean_nii': 0/239 NA values
'failed_sn_rrmean': 91/239 NA values
'failed_swrrmean_nii': 0/239 NA values
'failed_automask_res': 91/239 NA values


In [195]:
# Move or copy files into the rPOP and rPOP_failed directories.
overwrite = False
# ------------------------------------

copy_cols = [
    "rrmean_nii", "failed_rrmean_nii",
    "wrrmean_nii", "failed_wrrmean_nii",
    "sn_rrmean", "failed_sn_rrmean",
    "swrrmean_nii", "failed_swrrmean_nii",
    "automask_res", "failed_automask_res"
]
copied_subjs = od(zip(copy_cols, [[]] * len(copy_cols)))
not_copied_subjs = od(zip(copy_cols, [[]] * len(copy_cols)))
for col in copy_cols:
    count = 0
    for subj, row in ideas_reorg.iterrows():
        copied = False
        if not pd.isna(row[col]):
            for oldpath, newpath in row[col].items():
                if np.all((op.isfile(oldpath), np.any((overwrite, not op.exists(newpath))))):
                    os.makedirs(op.dirname(newpath), exist_ok=True)
                    shutil.copy(oldpath, newpath)
                    copied = True
        if copied:
            copied_subjs[col].append(subj)
        else:
            not_copied_subjs[col].append(subj)
    count += 1

print(col, subj, count)

failed_automask_res 64681 1


# Empty voxel identification

In [201]:
sys.path.append(op.join(op.expanduser("~"), "code"))
from general.nifti import nifti_ops as nops

ModuleNotFoundError: No module named 'nibabel'

'/home/mac/dschonhaut'

# Checks

In [99]:
print(keep_subjs)

[50001, 50005, 50026, 50082, 50651, 54720]


In [167]:
subj = 50001
cols = ["subj_dir_source", "raw_dir_source", "rPOP_dir_source"]
print(subj, '-'*len(str(subj)), sep='\n')
for col in ideas_reorg.columns:
    val = ideas_reorg.at[subj, col]
    if not pd.isna(val):
        if type(val) == od:
            print(f"{col}:")
            for k, v in val.items():
                print(f"\t<< {k}\n\t>> {v}\n")
        else:
            print(f"{col}: {val}\n")

50001
-----
subj_dir_source: /mnt/coredata/Projects/IDEAS_FullAnalysis/data_oldorganized/FirstQuery_12012021/IDEASHOLD/50001

dcm_dir_used_source: /mnt/coredata/Projects/IDEAS_FullAnalysis/data_oldorganized/FirstQuery_12012021/IDEASHOLD/50001/PET_Brain_AC__AC_/1940-03-17_14_00_54.0/I10025334

rPOP_dir_source: /mnt/coredata/Projects/IDEAS_FullAnalysis/data_oldorganized/FirstQuery_12012021/IDEASHOLD_allnifti

subj_dir_target: /mnt/coredata/Projects/IDEAS/data/50001

dcm_dir_used_target: /mnt/coredata/Projects/IDEAS/data/50001/raw/PET_Brain_AC__AC_/1940-03-17_14_00_54.0/I10025334

raw_dir_target: /mnt/coredata/Projects/IDEAS/data/50001/raw

proc_dir_target: /mnt/coredata/Projects/IDEAS/data/50001/proc

realign_dir_target: /mnt/coredata/Projects/IDEAS/data/50001/proc/intermed/realign

rPOP_dir_target: /mnt/coredata/Projects/IDEAS/data/50001/proc/intermed/rPOP

empty_voxel_dir_target: /mnt/coredata/Projects/IDEAS/data/50001/proc/intermed/empty_voxels

n_recon_dirs: 1

n_recon_dcms: 110

n_r

In [40]:
_subdirs = op.normpath(paths['path_to_dcm_folder']).split(os.sep)
try:
    ideas_reorg.at[subj, "recon_dir_used"] = _subdirs[_subdirs.index(str(subj))+1]
except ValueError:
    print('hi')

hi


In [21]:
ideas_reorg = pd.DataFrame(index=ideas_paths.index, columns=newpath_cols)
ideas_reorg.head()

Unnamed: 0_level_0,recon_dir_used,n_dcms_used,dcmHeader_used,n_recon_niisrecon_niis,realigned_niis,mean_nii,wmean_nii,swmean_nii,automask_res,sn_rmean,mult_recons,recon_dir_alt,dcmHeader_alt,recon_niis_alt,first_proc_failedrealigned_niis_failed,mean_nii_failed,wmean_nii_failed,swmean_nii_failed,automask_res_failed,sn_rmean_failed
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
50001,,,,,,,,,,,,,,,,,,,,
50002,,,,,,,,,,,,,,,,,,,,
50003,,,,,,,,,,,,,,,,,,,,
50004,,,,,,,,,,,,,,,,,,,,
50005,,,,,,,,,,,,,,,,,,,,


In [13]:
ideas_paths.set_index("ID")

Unnamed: 0_level_0,num_dcms,path_to_dcm_folder,path_to_rPOP_outputs,path_to_nativenii,path_to_warpednii,path_to_smoothed_nii,NOTES,FIX_path_ALL
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
50001,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50002,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50003,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50004,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50005,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
...,...,...,...,...,...,...,...,...
64677,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
64678,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
64679,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
64680,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,


In [10]:
dat_cols = ["a", "b", "c", "d"]
subj = "0001"
dat = [pd.Series(index=dat_cols, name="0001")]
dat["a"] = "apple"
dat["b"] = np.pi
dat["d"] = np.nan
dat["d"] = "dolphin"

dat

  dat = pd.Series(index=dat_cols)


a       apple
b    3.141593
c         NaN
d     dolphin
dtype: object