In [4]:
import os
import os.path as op

import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [None]:
WORKING_PATH = "..."  # Set your working path here.

# Data folder should contain the following files:
# - behavioral data
# - aparcstats from different parcellations
# - aseg_stats from different parcellations
beh = pd.read_csv(
    op.join(WORKING_PATH, "data/beh/beh_residualized_extended_050225.csv")
)
aparc = pd.read_csv(
    op.join(WORKING_PATH, "data/fs-measures/aparcstats2table_combined.csv")
)
aseg = pd.read_csv(
    op.join(WORKING_PATH, "data/fs-measures/aseg_stats_combined.csv")
).rename(columns={"Measure:volume": "id"})

beh = beh[beh["site"] == "regensburg"]
db = pd.merge(beh, aparc, on="id", how="inner").merge(aseg, on="id", how="inner")
db

In [6]:
# Prepare Brain Imaging Data for PALM
palm_data_path = op.join(
    WORKING_PATH,
    "01_palm/data",
)
os.makedirs(palm_data_path, exist_ok=True)

cortical_areas = [
    "rostralanteriorcingulate",
    "caudalanteriorcingulate",
    "posteriorcingulate",
    "parahippocampal",
    "lateralorbitofrontal",
    "medialorbitofrontal",
    "insula",
    "precuneus",
]

subcortical_areas = [
    "Thalamus-Proper",
    "Caudate",
    "Accumbens-area",
    "Putamen",
    "Hippocampus",
    "Amygdala",
]

# LH Thickness
db[[f"lh_{area}_thickness" for area in cortical_areas]].to_csv(
    op.join(palm_data_path, "lh_thickness.csv"), index=False, header=False
)
# RH Thickness
db[[f"rh_{area}_thickness" for area in cortical_areas]].to_csv(
    op.join(palm_data_path, "rh_thickness.csv"), index=False, header=False
)

# LH Area
db[[f"lh_{area}_area" for area in cortical_areas]].to_csv(
    op.join(palm_data_path, "lh_area.csv"), index=False, header=False
)
# RH Area
db[[f"rh_{area}_area" for area in cortical_areas]].to_csv(
    op.join(palm_data_path, "rh_area.csv"), index=False, header=False
)

# LH Subcortical Volume
lh_subcortical_volume = db[[f"Left-{area}" for area in subcortical_areas]]
lh_subcortical_volume.to_csv(
    op.join(palm_data_path, "lh_volume.csv"), index=False, header=False
)
# RH Subcortical Volume
rh_subcortical_volume = db[[f"Right-{area}" for area in subcortical_areas]]
rh_subcortical_volume.to_csv(
    op.join(palm_data_path, "rh_volume.csv"), index=False, header=False
)

In [8]:
# Create design matrices for PALM
def create_sex_specific_var(db, variable):
    var_female = db[variable].values.copy()
    var_male = db[variable].values.copy()
    # Set male values to 0 for increase_female and vice versa
    var_female[db["sex"] == "male"] = 0
    var_male[db["sex"] == "female"] = 0
    return var_female, var_male


increase_female, increase_male = create_sex_specific_var(db, "increase")

encoder = OneHotEncoder(
    drop="first",
    categories=[["male", "luteal", "pill", "menopause"]],
    sparse_output=False,
)
cycle = encoder.fit_transform(db["cycle"].values.reshape(-1, 1))

# Define configurations
design_configs = [
    {
        "name": "thickness",
        "male_var": increase_male,
        "female_var": increase_female,
        "add_tbv": False,
    },
    {
        "name": "area",
        "male_var": increase_male,
        "female_var": increase_female,
        "add_tbv": False,
    },
    {
        "name": "volume",
        "male_var": increase_male,
        "female_var": increase_female,
        "add_tbv": True,
    },
]

# Create base design matrix
base_design = pd.concat(
    [
        pd.Series(increase_male, name="increase_male"),
        pd.Series(increase_female, name="increase_female"),
        pd.DataFrame(cycle, columns=["luteal", "pill", "menopause"]),
        db["age"].rename("age"),
    ],
    axis=1,
)

# Create and save design matrices for each configuration
for config in design_configs:
    design = base_design.copy()
    design["increase_male"] = config["male_var"]
    design["increase_female"] = config["female_var"]

    if config["add_tbv"]:
        design = pd.concat([design, db["BrainSegVolNotVent"]], axis=1)

    design.to_csv(
        op.join(palm_data_path, f"design_{config['name']}.csv"),
        index=False,
        header=False,
    )

In [9]:
# Create contrast matrix
# Rows = different contrasts
# Columns = [cort_men, cort_women, sex, age, site_a, site_b, site_c, TBV]
contrasts_volume = np.array(
    [
        [1.00, 1.00, 0, 0, 0, 0, 0],  # Average effect across sexes
        [1.00, -1.00, 0, 0, 0, 0, 0],  # Men > Women
        [-1.00, 1.00, 0, 0, 0, 0, 0],  # Women > Men
        [1.00, 0.00, 0, 0, 0, 0, 0],  # Effect in men
        [0.00, 1.00, 0, 0, 0, 0, 0],  # Effect in women
    ]
)
pd.DataFrame(contrasts_volume).to_csv(
    op.join(palm_data_path, "contrast_volume.csv"), index=False, header=False
)
# Exclude TBV for thickness and area.
pd.DataFrame(contrasts_volume[:, :-1]).to_csv(
    op.join(palm_data_path, "contrast_thickness.csv"), index=False, header=False
)
pd.DataFrame(contrasts_volume[:, :-1]).to_csv(
    op.join(palm_data_path, "contrast_area.csv"), index=False, header=False
)