# Selection of PET FDG images to process

In [1]:
import os
import pandas as pd 


In [2]:
dir = '/scratch/caroline/papers/ongoing/project00/ADNI_analysis/data'
file = pd.read_csv(os.path.join(dir, 'pet_info.csv'), header=None, delimiter='/')
scalar = pd.read_csv('/scratch/caroline/papers/ongoing/project00/BA_predictions_2025/data/adni/adni_20250528.csv')
img = pd.read_csv('/scratch/caroline/papers/ongoing/project00/BA_predictions_2025/data/adni/adni_nifti_20250522.csv',usecols=['GUID','Individual','Timepoint', 'Scan date'])
img = img.rename(columns={'Timepoint':'TimePoint'})
scalar = scalar.rename(columns={'InputIds':'GUID'})


In [3]:
#Drop duplicated rows --> one row per patient per scan date file=PET data
file1 = file.drop_duplicates()
# Separate info that is in one column
file1[['date', 'time']] = file1[3].str.split('_', n=1, expand=True)
file1[['ID', 'date0', 'time0', 'project']] = file1[0].str.extract(r'(\d+)\s+(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})\s+(\w+)')
#Remove unnecessary info
#file2 = file1.drop(columns=[0,3, 'date0', 'time0', 'project', 'time'])
# Rename the other columns
file2 = file1.rename(columns={1:'Individual'})

In [4]:
#%% Propagate immutable information

# Fill missing values within each Individual using forward and backward fill
scalar[['PTDOBMM', 'PTDOBYY']] = (
    scalar.groupby('Individual')[['PTDOBMM', 'PTDOBYY']]
    .apply(lambda group: group.bfill().ffill())
    .reset_index(level=0, drop=True)  # Drop extra index added by groupby
)

  .apply(lambda group: group.bfill().ffill())


In [5]:
# merge data
data = pd.merge(scalar, img, on=['GUID','Individual','TimePoint'])
len(data['Individual'].unique()) #2190 id 18388 entry
#%% Calculate scan date
data['scandate'] = pd.to_datetime(data['Scan date'])
data['scandate'] = pd.to_datetime(data['scandate'])#, format='%Y/%m')
data0 = data[['Individual', 'AltId', 'TimePoint', 'GUID', 'freesurfer_6_0_0_aparc_thickness_GUID', 'HMSTROKE', 'DXPARK', 
              'CDGLOBAL', 'MMSCORE',
              'PTDOBMM', 'PTDOBYY', 'scandate']]

In [6]:
# Convert date columns to datetime
file2['date'] = pd.to_datetime(file2['date'])

# Merge dataframes on Individual
merged = pd.merge(data0, file2, on='Individual')

# Calculate date difference in months
merged['date_diff'] = abs((merged['scandate'] - merged['date']).dt.days / 30)

# Filter for entries within 3 months
adni = merged[merged['date_diff'] <= 3].copy()

# Sort by Individual and date difference
adni = adni.sort_values(['Individual', 'date_diff'])

# Drop duplicates keeping the entry with minimum date difference for each Individual
adni0 = adni.drop_duplicates()#(subset=['Individual'], keep='first')

# Drop the date_diff column if not needed
#adni = adni.drop('date_diff', axis=1)


In [7]:
#Drop all duplicated Ids and keep only the first image
adni1 = adni.drop_duplicates(subset=['Individual'], keep='first')

In [8]:
#Go back to file names
file_names = adni1[['project','Individual',2,3]]

In [9]:
adni1.columns

Index([                           'Individual',
                                       'AltId',
                                   'TimePoint',
                                        'GUID',
       'freesurfer_6_0_0_aparc_thickness_GUID',
                                    'HMSTROKE',
                                      'DXPARK',
                                    'CDGLOBAL',
                                     'MMSCORE',
                                     'PTDOBMM',
                                     'PTDOBYY',
                                    'scandate',
                                             0,
                                             2,
                                             3,
                                             4,
                                             5,
                                        'date',
                                        'time',
                                          'ID',
                                       '

In [10]:
adni1.to_csv(os.path.join(dir, 'adni_fdg_pet_selection.csv'))

# List for unzip commands

In [8]:
# Create a list to store all unzip commands
unzip_commands = []

# Loop through all rows in file_names
for idx, row in file_names.iterrows():
    row_string = '/'.join(row.astype(str))+'/*'
    command = f"unzip PET.zip -d /scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/dcm {row_string}"
    unzip_commands.append(command)

# Optional: Save all commands to a bash script for later execution
with open(os.path.join(dir, "unzip_commands.sh"), "w") as f:
    f.write("#!/bin/bash\n\n")
    for cmd in unzip_commands:
        f.write(f"{cmd}\n")


# File to do the conversion to nifti

In [13]:
file_names_plus = adni1[['project','Individual',2,3,4]]

In [10]:
dcm2niix_commands = []

# Loop through all rows
for idx, row in file_names_plus.iterrows():
    row_string = '/'.join(row.astype(str)) + '/*'
    
    # General output
#    command_general = f"dcm2niix -o /scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET {row_string}"
#    dcm2niix_commands.append(command_general)

    # Individual-specific output (get corresponding ID from adni1)
    individual_id = adni1.loc[idx, 'Individual']
    command_individual = f"dcm2niix -o /scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/nifti/{individual_id} /scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/dcm/{row_string}"
    dcm2niix_commands.append(command_individual)

    # Optional: Save all commands to a bash script for later execution
    with open(os.path.join(dir, "dcm2niix_commands.sh"), "w") as f:
        f.write("#!/bin/bash\n\n")
        for cmd in dcm2niix_commands:
            f.write(f"{cmd}\n")


In [10]:
## Create a bash file to create the folder organization

In [11]:
# Create mkdir commands for each unique Individual
mkdir_commands = []

# Loop through unique Individual IDs
for individual in adni1['Individual'].unique():
    command = f"mkdir -p /scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/nifti/{individual}"
    mkdir_commands.append(command)

# Save commands to a bash script
with open(os.path.join(dir, "make_directories.sh"), "w") as f:
    f.write("#!/bin/bash\n\n")
    for cmd in mkdir_commands:
        f.write(f"{cmd}\n")

# Create the config files for petsurfer

In [37]:
individuals = adni1['Individual'].unique()

# Directory to save config files
output_dir = "/scratch/caroline/papers/ongoing/project00/ADNI_analysis/code/pet_process/"
os.makedirs(output_dir, exist_ok=True)

# Template string for the config file
template = """#!/bin/bash

# configuration file for petprocess.sh
# set all pre-requisites, paths and parameters

# thread limit for OMP_NUM_THREADS and ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS
max_threads=2

# pre-requisite toolbox: path to freesurfer
freesurfer_path=/usr/local/freesurfer-7.4.1

# pre-requisite toolbox: path to fsl
fsl_path=/usr/local/fsl-6.0.7.6

# input: path to pet scan
pet_source="/scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/nifti/{individual}"

# input: path to mri freesurfer output
#mri_source=mri_path
mri_source=/diskstation/CG/adni/processing/freesurfer/6.0.0
# mri_source=/diskstation/CG/adni/processing/freesurferLongitudinal/6.0.0

# specify either reference region 1 or reference region 2
# reference region 1 (use for fdg pet)
ref=174
ref_name="pons"

# reference region 2 (use for tau pet)
#ref=(8 47)
#ref_name="cerebellum_cortex"

# path to R script (petsurfer_scalar_to_csv.R) to save scalar output as csv file
rscript_path="/scratch/caroline/papers/ongoing/project00/ADNI_analysis/code/petsurfer"

# list of output files to be saved (all output except those specified will be removed)
keep_list=/scratch/caroline/papers/ongoing/project00/ADNI_analysis/code/petsurfer/keeplist.txt

# set to 1 to clean up the output after processing
cleanup=1
"""

# Loop through each individual and create a config file
for individual in individuals:
    filename = os.path.join(output_dir, f"config_{individual}.sh")
    with open(filename, 'w') as f:
        f.write(template.format(individual=individual))


In [None]:
#Rename files in folders to match the Id
# Create a list to store all rename commands
rename_commands = []

# Loop through unique Individual IDs
for individual in individuals:
    # Generate the rename command
    command = f"""
cd /scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/nifti/{individual}
for f in *; do
    if [ -f "$f" ]; then
        ext="${{f##*.}}"
        mv "$f" "{individual}.$ext"
    fi
done
"""
    rename_commands.append(command)

# Save commands to a bash script
with open(os.path.join(dir, "rename_files.sh"), "w") as f:
    f.write("#!/bin/bash\n\n")
    for cmd in rename_commands:
        f.write(f"{cmd}\n")

In [38]:
# Create a list to store all gzip commands
gzip_commands = []

# Loop through unique Individual IDs
for individual in individuals:
    # Generate the gzip command
    command = f"""
cd /scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/nifti/{individual}
for f in *.nii; do
    if [ -f "$f" ]; then
        gzip "$f"
    fi
done
"""
    gzip_commands.append(command)

# Save commands to a bash script
with open(os.path.join(dir, "gzip_files.sh"), "w") as f:
    f.write("#!/bin/bash\n\n")
    for cmd in gzip_commands:
        f.write(f"{cmd}\n")

# Create the qsub for all IDs

In [40]:
# Create the queue sub based on a template to all images
out_path = "/scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/processed/"

template_sub = []

for _, row in adni1[["Individual", "freesurfer_6_0_0_aparc_thickness_GUID"]].iterrows():
    pet_guid = row["Individual"]
    freesurfer_guid = row["freesurfer_6_0_0_aparc_thickness_GUID"]
    output_path = "/scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/processed/"
    output_guid = pet_guid
    config_file = f"/scratch/caroline/papers/ongoing/project00/ADNI_analysis/code/pet_process/config_{pet_guid}.sh"
    template = (f"""qsub -q all.q@saga.cgland -S /bin/bash -cwd petprocess.sh -p {pet_guid} -f {freesurfer_guid} -o {output_path} -g {output_guid} -s {config_file}""")
    template_sub.append(template)

out_files = "/scratch/caroline/papers/ongoing/project00/ADNI_analysis/code/"
# Save commands to a bash script
queue_script = os.path.join(out_files, "queue_commands.sh")
with open(queue_script, "w") as f:
    f.write("#!/bin/bash\n\n")
    for cmd in template_sub:
        f.write(f"{cmd}\n")

## If you want to do this on a bash taking the input from the csv file

In [None]:
# adni2 = adni1[['Individual', 'freesurfer_6_0_0_aparc_thickness_GUID']]
# adni2.to_csv(os.path.join(dir, 'queue.csv'), index=False)

In [None]:
# output_path="/scratch/caroline/papers/ongoing/project00/ADNI_analysis/data/images/PET/processed/"

# tail -n +1 "${1}" | while IFS=, read -r Individual freesurfer_6_0_0_aparc_thickness_GUID; do

#     echo "Running $Individual"
#     config_file="/scratch/caroline/papers/ongoing/project00/ADNI_analysis/code/pet_process/config_${Individual}.sh"
#     echo "qsub -q all.q@saga.cgland -S /bin/bash -cwd petprocess.sh -p ${Individual} -f ${freesurfer_6_0_0_aparc_thickness_GUID} -o ${output_path} -g ${Individual} -s ${config_file}"
    
# done

In [None]:
#adni2

Unnamed: 0,Individual,freesurfer_6_0_0_aparc_thickness_GUID
1152,002_S_0295,290c2ed0-c79b-468a-9c63-a8b27374a024
5952,002_S_0413,07a899f8-98d8-4b92-9419-bc5c6d61fe11
10848,002_S_0685,4ce6a3d7-f0cb-4022-9d40-79758b761264
15552,002_S_0729,2cf4c520-5081-4620-9fa6-eccd463e2990
20832,002_S_1155,027f63a7-9434-433d-99b7-cda263d4c4b1
...,...,...
4282752,941_S_6052,188b3446-2c17-4611-9e5f-a0cf12e73d11
4282944,941_S_6068,1a70c69c-8730-4ba5-9cd1-798577b0dff1
4283232,941_S_6345,088c7d78-2021-4820-93c8-e66b1cc14e64
4283520,941_S_6803,3e04e375-ea4e-4fea-98f8-d2f07d036404
