# Load packages

In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import nibabel as nib
from scipy.ndimage import gaussian_filter

# Set up paths

In [2]:
code_dir = Path.cwd()
statistics_dir = code_dir.parent
input_dir = statistics_dir / "input"
output_dir = statistics_dir / "output"

# Define functions

In [8]:
def extract_voxel_data(ef_path, tmax_path, penumbra_path, infarct_path, subject_id):
    # Load the NIfTI files
    ef_img = nib.load(ef_path)
    tmax_img = nib.load(tmax_path)
    penumbra_img = nib.load(penumbra_path)
    infarct_img = nib.load(infarct_path)

    # Get the data arrays
    ef_data = ef_img.get_fdata()
    tmax_data = tmax_img.get_fdata()
    penumbra_data = penumbra_img.get_fdata()
    infarct_data = infarct_img.get_fdata()
    
    # Smooth the EF and Tmax data
    ef_data = gaussian_filter(ef_data, sigma=0.5)
    tmax_data = gaussian_filter(tmax_data, sigma=0.5)

    # Extract the EF values within the penumbra mask
    penumbra_mask = penumbra_data > 0
    ef_values = ef_data[penumbra_mask]

    # Extract Tmax values within the penumbra mask
    tmax_values = tmax_data[penumbra_mask]

    # Create the binary variable for the infarct mask
    infarct_in_penumbra = infarct_data[penumbra_mask] > 0
    binary_values = infarct_in_penumbra.astype(int)

    # Create a DataFrame
    df = pd.DataFrame({
        'sub_id': subject_id,
        'ef_value': ef_values,
        'tmax_value': tmax_values,
        'infarct': binary_values
    })

    return df

In [3]:
def process_subjects(data_dir, subjects):
    all_data = []

    for subject_id in subjects:
        ef_path = os.path.join(data_dir, f'{subject_id}_ses-1_desc-moco_desc-brain_desc-wocsf_EFz.nii.gz')
        tmax_path = os.path.join(data_dir, f'{subject_id}_ses-1_desc-moco_desc-brain_desc-wocsf_desc-RAPID_Tmax.nii.gz')
        penumbra_path = os.path.join(data_dir, f'{subject_id}_ses-1_desc-RAPID_desc-penumbra_mask.nii.gz')
        infarct_path = os.path.join(data_dir, f'{subject_id}_ses-2_space-dsc_desc-lesion_mask.nii.gz')

        subject_data = extract_voxel_data(ef_path, tmax_path, penumbra_path, infarct_path, subject_id)
        all_data.append(subject_data)

    # Concatenate all subject data into a single DataFrame
    all_data_df = pd.concat(all_data, ignore_index=True)
    return all_data_df

In [5]:
def load_subject_ids(file_path):
    with open(file_path, 'r') as file:
        subjects = file.read().splitlines()
    return subjects

# Extract voxel-level data

In [9]:
data_dir = statistics_dir / 'input/ef_voxelwise'
subject_ids_file = output_dir / "future_infarction_list_failedtmaxreg_ex.txt"
subjects = load_subject_ids(subject_ids_file)
df_smoothed = process_subjects(data_dir, subjects)

# Save to CSV

In [10]:
df_smoothed.to_csv(input_dir / 'ef_tmax_voxel_data_smoothed.csv', index=False)