In [1]:
import os
import os.path as op

import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Load behavioral data.
DATA_DIR = "..."  # Set your DATA_DIR here.
WORKING_DIR = "..."  # Set your WORKING_DIR here.
beh = pd.read_csv(
    op.join(DATA_DIR, "beh/beh_residualized_extended_050225.csv"),
)
beh = beh[beh["site"] == "regensburg"].reset_index(drop=True)

# Combined group as Site x Sex.
beh = beh[["id", "age", "cycle", "sex", "increase", "AUCi"]]

# Sex and Cycle
encoder = OneHotEncoder(drop="first")
sex_code_map = {"male": 0, "female": 1}
beh["sex"] = beh["sex"].map(sex_code_map)
beh["sex"] = beh["sex"].astype("category")

# encoder = OneHotEncoder(drop="first", categories=["male", "luteal", "pill", "menopause"])
cycle_code_map = {"male": 0, "luteal": 1, "pill": 2, "menopause": 3}
beh["cycle"] = beh["cycle"].map(cycle_code_map).astype("category")
beh

In [None]:
stressml_data_path = op.join(WORKING_DIR, "data")
os.makedirs(stressml_data_path, exist_ok=True)

# Load cortical and subcortical data
aparc = pd.read_csv(op.join(DATA_DIR, "fs-measures/aparcstats2table_combined.csv"))
aseg = pd.read_csv(op.join(DATA_DIR, "fs-measures/aseg_stats_combined.csv")).rename(
    columns={"Measure:volume": "id"}
)

subcortical = [
    "Accumbens-area",
    "Amygdala",
    "Caudate",
    "Cerebellum-Cortex",
    "Hippocampus",
    "Pallidum",
    "Putamen",
    "Thalamus-Proper",
    "VentralDC",
]

# Prepare brain regions.
# Get thickness and area columns
thickness_cols = [
    col for col in aparc.columns if "_thickness" in col or "_area" in col
] + ["id"]
# thickness_cols = [col for col in aparc.columns if "_thickness" in col] + ["id"]
thickness_df = aparc[thickness_cols].copy()

# Add subcortical columns for both hemispheres
volume_df = aseg[["id"]].copy()
for hemi in ["Left", "Right"]:
    for reg in subcortical:
        col = f"{hemi}-{reg}"
        volume_df.loc[:, col] = aseg[col]

# Add additional areas
# for reg in additional_areas:
#     volume_df.loc[:, reg] = aseg[reg]

# Merge everything
db = pd.merge(beh, thickness_df, on="id", how="inner").merge(
    volume_df, on="id", how="inner"
)
db.to_csv(op.join(stressml_data_path, "predict_db.csv"), index=False)
db