In [28]:
import os
import os.path as op
import sys
from glob import glob
import numpy as np
import pandas as pd

In [48]:
source_dir = "/mnt/coredata/Projects/IDEAS_FullAnalysis"
target_dir = "/mnt/coredata/Projects/IDEAS/data"

# Load the ideas_paths ssheet.
ideas_paths = pd.read_csv(
    "/Users/dschonhaut/Box/projects/ideas/data/metadata/IDEAS_paths_forDaniel_01-17-23_NM.csv"
)
assert ideas_paths["ID"].size == ideas_paths["ID"].nunique()
ideas_paths.set_index("ID", inplace=True)
print("ideas_paths: {}".format(ideas_paths.shape))

ideas_paths: (10700, 8)


In [18]:
# addtl data: missing voxel threshold file, missing voxel mask, SUVR image

In [27]:
pathmap = []
pathmap_cols = [
    "subj_dir_target", # the subject directory where all data will be copied
    
    "n_recon_dcms",  # the number of reconstructed .dcm files identified
    "n_recon_niis",  # the number of converted .nii files
    
    "recon_dir_source",  # we do *.dcm in this dir to get all the recon'd DICOMs (ALWAYS "path_to_dcm_folder")
    "recon_dir_target",
    
    "dcmHeader_source",  # dcmHeader.mat file containing DICOM header info
    "dcmHeader_target",
    
    "recon_niis_source",  # list of 1+ .nii frame files obtained directly from dcm2nii conversion
    "recon_niis_target",
    
    "realigned_niis_source",  # list of realigned nifti frames
    "realigned_niis_target",
    
    "mean_nii_source",  # mean image across realigned frames
    "mean_nii_target",
    
    "wmean_nii_source",  # warped, mean image
    "wmean_nii_target",
    
    "swmean_nii_source",  # smoothed, warped, mean image
    "swmean_nii_target",
    
    "automask_res_source",  # estimated smoothing of the recon'd image
    "automask_res_target",
    
    "rp_rmean_source",  # linear transform for PET frame realignment
    "rp_rmean_target",
    
    "sn_rmean_source",  # non-linear transform for PET frame realignment(?)
    "sn_rmean_target",
    
    "error", # log problems in data reorganization that need to be manually fixed
    
    "mult_recons",  # True if there are multiple recon dirs
    "recon_dir_alt",  # we do *.dcm in this dir to get DICOMs from the alt recon
    "n_dcms_used_alt",  # the number of .dcm files in the alt recon
    "dcmHeader_alt",  # dcmHeader.mat file for the alt recon
    "n_recon_niis_alt", # the number of recon'd nifti frames for the alt recon
    "recon_niis_alt",  # list of 1+ .nii frame files from dcm2nii conversion of the alt recon
    
    "first_proc_failed",  # True if initial processing failed and PET was reprocessed with manual centering
    "realigned_niis_failed",  # list of realigned nifti frames
    "mean_nii_failed",  # mean image across realigned frames
    "wmean_nii_failed",  # warped, mean image
    "swmean_nii_failed",  # smoothed, warped, mean image
    "automask_res_failed",  # estimated smoothing of the recon'd image
    "sn_rmean_failed",  # affine transform for PET frame realignment
]

for subj, paths in ideas_paths.iterrows():
    # Define the target subject directory
    subj_dir = op.join(target_dir, subj)
    ideas_reorg.at[subj, "subj_dir_target"] = subj_dir
    
    # Get path to the recon directory that was used for processing
    ideas_reorg.at[subj, "recon_dir_source"] = path["path_to_dcm_folder"]
    _subdirs = op.normpath(paths['path_to_dcm_folder']).split(os.sep)
    try:
        _recon_dir = _subdirs[_subdirs.index(str(subj))+1]
    except:
        ideas_reorg.at[subj, "error"] = "Failed to find recon dir name"
        continue
    ideas_reorg.at[subj, "recon_dir_target"] = op.join(subj_dir, "raw", _recon_dir)
        
    # Find reconstructed DICOMs, converted NIfTIs, and DICOM header info files.
    _dcm_files = sorted(glob(op.join(ideas_reorg.at[subj, "recon_dir_source"], '*.dcm')))
    _n_dcms = len(_dcm_files)
    if _n_dcms > 0:
        ideas_reorg.at[subj, "n_recon_dcms"] = _n_dcms
    else:
        ideas_reorg.at[subj, "error"] = "Failed to find DICOMs"
        continue
        
    _nii_files = sorted([f for f in glob(op.join(ideas_reorg.at[subj, "recon_dir_source"], '*.nii'))
                         if np.all([('mean' not in op.basename(f),
                                     op.basename(f)[0]!='r')
                                   ])])
    _n_niis = len(_nii_files)
    if _n_niis > 0:
        ideas_reorg.at[subj, "n_recon_niis"] = _n_niis
        ideas_reorg.at[subj, "recon_niis_source"] = _nii_files
        ideas_reorg.at[subj, "recon_niis_target"] = [op.join(ideas_reorg.at[subj, "recon_dir_target"], 'frame{:>04}_{}.nii'.format(idx, subj))
                                                     for idx in range(1, len(ideas_reorg.at[subj, "recon_niis_source"])+1)] 
    else:
        ideas_reorg.at[subj, "error"] = "Failed to find converted NIfTIs"
        continue
        
    _dcm_hdr = op.join(ideas_reorg.at[subj, "recon_dir_source"], 'dcmHeaders.mat')
    if op.exists(_dcm_hdr):
        ideas_reorg.at[subj, "dcmHeader_source"] = _dcm_hdr
        ideas_reorg.at[subj, "dcmHeader_target"] = op.join(ideas_reorg.at[subj, "recon_dir_target"], "dcmHeaders_{}.mat".format(subj))
        
    # Log whether the subject has one or multiple recons
    if paths['num_dcms'] == "multiple":
        ideas_reorg.at[subj, "mult_recons"] = True
    else:
        ideas_reorg.at[subj, "mult_recons"] = False
    
    # Find processed PET images.
    if np.isnan(paths['FIX_path_ALL']):
        ideas_reorg.at[subj, "first_proc_failed"] = False
        if ideas_reorg.at[subj, "n_recon_niis"] > 1:
            ideas_reorg.at[subj, "realigned_niis_source"] = [] # XX
            ideas_reorg.at[subj, "realigned_niis_target"] = [] # XX
        else:
            ideas_reorg.at[subj, "realigned_niis_source"] = []
            ideas_reorg.at[subj, "realigned_niis_target"] = []
        ideas_reorg.at[subj, ""] = 
    else:
        ideas_reorg.at[subj, "first_proc_failed"] = True


SyntaxError: invalid syntax (2214476369.py, line 31)

In [47]:
'r'=='R'

False

In [40]:
_subdirs = op.normpath(paths['path_to_dcm_folder']).split(os.sep)
try:
    ideas_reorg.at[subj, "recon_dir_used"] = _subdirs[_subdirs.index(str(subj))+1]
except ValueError:
    print('hi')

hi


In [21]:
ideas_reorg = pd.DataFrame(index=ideas_paths.index, columns=newpath_cols)
ideas_reorg.head()

Unnamed: 0_level_0,recon_dir_used,n_dcms_used,dcmHeader_used,n_recon_niisrecon_niis,realigned_niis,mean_nii,wmean_nii,swmean_nii,automask_res,sn_rmean,mult_recons,recon_dir_alt,dcmHeader_alt,recon_niis_alt,first_proc_failedrealigned_niis_failed,mean_nii_failed,wmean_nii_failed,swmean_nii_failed,automask_res_failed,sn_rmean_failed
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
50001,,,,,,,,,,,,,,,,,,,,
50002,,,,,,,,,,,,,,,,,,,,
50003,,,,,,,,,,,,,,,,,,,,
50004,,,,,,,,,,,,,,,,,,,,
50005,,,,,,,,,,,,,,,,,,,,


In [13]:
ideas_paths.set_index("ID")

Unnamed: 0_level_0,num_dcms,path_to_dcm_folder,path_to_rPOP_outputs,path_to_nativenii,path_to_warpednii,path_to_smoothed_nii,NOTES,FIX_path_ALL
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
50001,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50002,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50003,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50004,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
50005,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
...,...,...,...,...,...,...,...,...
64677,multiple,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
64678,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
64679,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,
64680,single,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,/mnt/coredata/Projects/IDEAS_FullAnalysis/all_...,/mnt/coredata/Projects/IDEAS_FullAnalysis/data...,,


In [10]:
dat_cols = ["a", "b", "c", "d"]
subj = "0001"
dat = [pd.Series(index=dat_cols, name="0001")]
dat["a"] = "apple"
dat["b"] = np.pi
dat["d"] = np.nan
dat["d"] = "dolphin"

dat

  dat = pd.Series(index=dat_cols)


a       apple
b    3.141593
c         NaN
d     dolphin
dtype: object