# In-clinic data to validate EMA with UPDRS

## 0. Import packages

- document versions for reproducibility

In [None]:
# import packages
import pandas as pd
import numpy as np
import os
import sys
import csv
import json
import importlib
from itertools import product, compress
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr
from scipy.signal import welch

In [None]:
print('Python sys', sys.version)
print('pandas', pd.__version__)
print('numpy', np.__version__)
# print('mne_bids', mne_bids.__version__)
# print('mne', mne.__version__)
# print('sci-py', scipy.__version__)
# print('sci-kit learn', sk.__version__)
# print('matplotlib', plt_version)

"""
Python sys 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]
pandas 2.1.1
numpy 1.26.0
"""

In [None]:
from utils import load_utils, load_data, prep_data
# from PerceiveImport.classes import main_class

In [None]:
figpath = load_utils.get_onedrive_path('emaval_fig')


## 1. Prepare Data

### 1a) Import EMA and UPDRS

In [None]:
# # SINGLE CONDITION
# CONDITION = 'm0s0'

# ema_df, updrs_df = load_data.get_EMA_UPDRS_data(condition=CONDITION)


In [None]:
importlib.reload(load_data)
importlib.reload(load_utils)


# list of IDs to exclude bcs data is still missing
excl_ids = []  # 'ema31', 'ema32', 'ema33', 'ema34'

# 4 CONDITIONS
EMA, UPDRS = {}, {}

for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:
    ema_temp, updrs_temp = load_data.get_EMA_UPDRS_data(
        condition=COND, CONVERT_SCORES=True,
    )
    EMA[COND] = ema_temp
    UPDRS[COND] = updrs_temp

    # print(f'EMA ids: {EMA[COND]["study_id"]}')
    # print(f'UPDRS ids: {UPDRS[COND]["study_id"]}')

    for ema_n_excl in excl_ids:
        if ema_n_excl in EMA[COND]['study_id'].values:
            drop_idx = np.where(EMA[COND]['study_id'] == ema_n_excl)[0][0]
            EMA[COND] = EMA[COND].drop(drop_idx).reset_index(drop=True)
            print(f'drop {ema_n_excl} in EMA, index: {drop_idx}')
        if ema_n_excl in UPDRS[COND]['study_id'].values:
            drop_idx = np.where(UPDRS[COND]['study_id'] == ema_n_excl)[0][0]
            UPDRS[COND] = UPDRS[COND].drop(drop_idx).reset_index(drop=True)
            print(f'drop {ema_n_excl} in UPDRS, index: {drop_idx}')
        

### 1b) Preprocess data

#### Get (mean-corrected) EMA and UPDRS values per symptom subtype

In [None]:
importlib.reload(load_data)

importlib.reload(prep_data)


sumdf = prep_data.get_sum_df(EMA_dict=EMA, UPDRS_dict=UPDRS,
                             MEAN_CORR=True,)

# sumdf

#### Split in Training and Test Cohorts

In [None]:
# SPLIT DATA IN TRAIN AND TEST

train_subs, test_subs = prep_data.get_train_test_split(sumdf)

traindf = sumdf.loc[[i for i in sumdf.index if i in train_subs]]

testdf = sumdf.loc[[i for i in sumdf.index if i in test_subs]]

print(traindf.shape, testdf.shape)

## 2) Describe and compare Population: training vs. test cohort

In [None]:
from scipy.stats import mannwhitneyu, ttest_rel, pearsonr

In [None]:
print(f'train subs: {traindf.index}')
print(f'test subs: {testdf.index}') 

#### age, PD duration, post-op duration in two data-splits

In [None]:
age = {
    'train': [62, 59, 42, 52, 68, 68, 56, 75, 62, 69, 55, 64, 74, 63, 48, 62, 68, 62, 71, 68, 73, 70, 67, 65],
    'test': [56, 75, 62, 66, 56, 63, 59, 44]
}

for key, values in age.items():
    print()
    print(f'{key}: mean: {np.mean(values)}, std: {np.std(values)}')

# comparison test vs train
result = mannwhitneyu(age['train'], age['test'])
print(f'Mann-Whitney U test for age: statistic={result.statistic}, p-value={result.pvalue}')


In [None]:
pd_duration = {
    'train': [7, 11, 16, 8, 8, 9, 11, 20, 11, 9, 7, 15, 16, 14, 14, 3, 21, 7, 7, 10, 5, 10, 8, 26],
    'test': [10, 12, 16, 13, 20, 8, 11, 18]
}

for key, values in pd_duration.items():
    print()
    print(f'{key}: mean: {np.mean(values)}, std: {np.std(values)}')

# comparison test vs train
result = mannwhitneyu(pd_duration['train'], pd_duration['test'])
print(f'Mann-Whitney U test for PD duration: statistic={result.statistic}, p-value={result.pvalue}')


In [None]:
postop_months = {'train': [3, 3, 3, 3, 18, 24, 18, 12, 3, 24, 12, 3, 36, 24, 3, 3, 12, 3, 12, 3, 12, 36, 3, 3],
                 'test': [3, 3, 18, 3, 3, 12, 38, 24]}

for key, values in postop_months.items():
    print()
    print(f'{key}: mean: {np.mean(values)}, std: {np.std(values)}')

# comparison test vs train
result = mannwhitneyu(postop_months['train'], postop_months['test'])
print(f'Mann-Whitney U test for post-op months: statistic={result.statistic}, p-value={result.pvalue}')

#### compare therapeutic effects (UPDRS and EMA) in two data-splits


absolute values

In [None]:
# TODO

mean-corrected comparison

In [None]:
fig, axes = plt.subplots(1, len(traindf.keys()), 
                         sharey='row', figsize=(12, 3))

yticks = [-10, -5, 0, 5, 10]

for i_ft, ft in enumerate(list(testdf.keys())):

    train = traindf[ft].values
    train = train[~np.isnan(train)]
    test = testdf[ft].values
    test = test[~np.isnan(test)]

    axes[i_ft].boxplot([train, test])
    axes[i_ft].set_xlabel(f'#{i_ft+1}')

    # do stats
    stat, p = mannwhitneyu(train, test)
    print(f'Variable {i_ft+1}, {ft}: stat: {stat}, p={round(p, 4)}')
    print(f'\tsamples sizes: # {i_ft+1}: train={len(train)}, test={len(test)}')

for ax in axes:
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines[['right', 'top', 'left', 'bottom']].set_visible(False)
    for y in yticks: ax.axhline(y, alpha=.3, color='gray',)

axes[0].set_yticks(yticks)
axes[0].set_yticklabels(yticks)
axes[0].set_ylabel('variable change (points)')

plt.suptitle('Intra-individual changes in EMA / UPDRS per therapy condition: training vs test cohort')

plt.tight_layout()


# plt.savefig(os.path.join(figpath, 'train_data', 'training_vs_test_vars'), dpi=450,
#             facecolor='w',)

plt.close()

## 3) EMA x UPDRS correlations


- EMA vs UPDRS correlations
- variance explained by non-motor domains (LMM)
- proportionality analysis, UPDRS-III delta per point of EMA change

In [None]:
CAT_COLORS = {'brady': 'orange', 'tremor': 'purple', 'gait': 'darkgreen'}


In [None]:
def scatter_EMA_UPDRS(
    ax, dat_df,
    EMA_subscore = 'brady',
    UPDRS_subscore = 'brady',
    show_updrs_improve=True,
    CAT_COLORS = {'brady': 'orange', 'tremor': 'purple', 'gait': 'darkgreen'},
    FONTSIZE=14,
):

    ema_values, updrs_values = [], []

    for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:

        ema_v = dat_df[f'EMA_SUM_{EMA_subscore}_{COND}']
        updrs_v = dat_df[f'UPDRS_SUM_{UPDRS_subscore}_{COND}']

        nan_sel = np.logical_or(pd.isna(ema_v), pd.isna(updrs_v))
        ema_v = ema_v[~nan_sel]
        updrs_v = updrs_v[~nan_sel]

        ema_values.extend(ema_v)
        updrs_values.extend(updrs_v)

    # plot UPDRS clinical IMPROVEMENT
    if show_updrs_improve:
        updrs_values = np.array(updrs_values) * -1
        ax.set_xlabel(f'UPDRS-improvement\n(points, high: less symptoms)', size=FONTSIZE, )
    
    else:
        ax.set_xlabel(f'UPDRS {UPDRS_subscore}\n(low: less symptoms)', size=FONTSIZE)


    ax.scatter(updrs_values, ema_values, color=CAT_COLORS[EMA_subscore], alpha=0.5, s=75,)
    ax.axhline(y=0, c='gray', alpha=0.3)
    ax.axvline(x=0, c='gray', alpha=0.3)

    R, pval = pearsonr(
        [x for x in updrs_values if not np.isnan(x)],
        [y for y in ema_values if not np.isnan(y)]
    )

    # ax.set_title(f'{EMA_subscore}  R: {R.round(2)}, p={pval.round(5)}')
    ax.set_title(
        f'{EMA_subscore}', size=FONTSIZE+4,
        pad=10,
        fontdict={'weight': 'bold',
                  'color': CAT_COLORS[EMA_subscore],},
        # bbox=dict(facecolor="white", edgecolor="k", boxstyle="round,pad=0.3")
    )

    ax.set_ylabel(f'EMA score\n(points, high: less symptoms)', size=FONTSIZE,)
    ax.tick_params(axis='both', which='both', size=FONTSIZE, labelsize=FONTSIZE)
    ax.spines[['right', 'top']].set_visible(False)


    print(f'{EMA_subscore}  R: {R.round(2)}, p={pval.round(6)}')

    return ax

In [None]:
figname = 'Mar26_motor_corr_meanCorrvalues'

fig, axes = plt.subplots(1, 3, figsize=(12, 4))

for ax, subscore in zip(axes, ['brady', 'tremor', 'gait']):

    ax = scatter_EMA_UPDRS(
        ax=ax, dat_df=traindf,
        EMA_subscore=subscore,
        UPDRS_subscore=subscore,
        show_updrs_improve=True,
    )

plt.tight_layout()

# plt.savefig(os.path.join(figpath, 'train_data', 'ema_updrs_corr', figname), dpi=300,
#             facecolor='w',)

plt.close()

LMMs, including non-motor items

In [None]:
import utils.stats as utilsstat
import statsmodels.formula.api as smf

In [None]:
print(traindf.columns)

In [None]:
importlib.reload(prep_data)


lmm_df = prep_data.get_lmm_df(traindf)

print(lmm_df.values.shape)

print(lmm_df.columns)

print(lmm_df.dtypes)

In [None]:
importlib.reload(utilsstat)

# set target motor symptom
motor_target = 'brady'  # 'brady', 'tremor', 'gait'

lmm_fix = {
    'single_motor': f"EMA_SUM_{motor_target} + EMA_SUM_nonmotor",
    'all_motor': (
        "EMA_SUM_brady + EMA_SUM_tremor + "
        "EMA_SUM_gait + EMA_SUM_nonmotor"
    )
}

FIX_EFF = 'single_motor'

# Random intercepts only
model = smf.mixedlm(
    f"UPDRS_SUM_{motor_target} ~ {lmm_fix[FIX_EFF]}",
    lmm_df,
    groups=lmm_df["cond"],
    # re_formula=f"~EMA_SUM_{motor_target}",  # for random slopes for EMA motor
)
result = model.fit()
print(result.summary())

## calculate explained variances
R2_marg, R2_cond = utilsstat.calc_expl_variances(fitted_model=result)

print(f"for {motor_target}: R2_marginal {np.round(R2_marg, 3)},"
      f"R2_conditional: {np.round(R2_cond, 3)}")



Show individual differences in EMA-point vs UPDRS-change

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4),
                         sharey=True, )

FONTSIZE=14

for i_trg, target in enumerate(['brady', 'tremor', 'gait']):

    id_coefs = []

    for subid in np.unique(lmm_df['subid']):

        x = lmm_df[f'EMA_SUM_{target}'][lmm_df['subid'] == subid]
        y = lmm_df[f'UPDRS_SUM_{target}'][lmm_df['subid'] == subid]

        try:
            z = np.polyfit(x, y, 1)
            coef = z[0]
        except:
            if all(x == y): coef = 0

        id_coefs.append(coef)
        xplot = np.arange(5)

    print(f'{target} mean-coefs: {np.mean(id_coefs).round(2)}, std: {np.std(id_coefs).round(2)}')

    axes[i_trg].hist(id_coefs, color=CAT_COLORS[target], alpha=0.5,)
    axes[i_trg].set_xlabel('$\\Delta$ UPDRS-III per EMA-change\n(points per point)', size=FONTSIZE,)
    if i_trg == 0: axes[i_trg].set_ylabel(f'Observations (subjects)', size=FONTSIZE,)

    axes[i_trg].set_title(
    f'{target}', size=FONTSIZE+4,
    pad=10,
    fontdict={'weight': 'bold',
                'color': CAT_COLORS[target],},
    )
    axes[i_trg].tick_params(axis='both', which='both', size=FONTSIZE, labelsize=FONTSIZE)
    axes[i_trg].spines[['right', 'top']].set_visible(False)


plt.tight_layout()

# plt.savefig(os.path.join(figpath, 'train_data', 'ema_updrs_corr', 'Mar2026_delta_UPDRS_EMA'), dpi=300,
#             facecolor='w',)

plt.close()

## 4) Hold-out Validation: UPDRS prediction based on EMA

In [None]:
# from sklearn.linear_model import LinearRegression, LogisticRegression
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
# from sklearn.metrics import r2_score, confusion_matrix
# from scipy.stats import f

from xgboost import XGBRegressor
# from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, root_mean_squared_error

from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
def plot_pred_true_scatter(
    ax, y_test, y_pred, TARGET_FT, CAT_COLORS,
    FONTSIZE=14, ax_title=None,
):

    ax.scatter(y_test, y_pred, color=CAT_COLORS[TARGET_FT], alpha=0.5, s=75,)

    ax.axhline(y=0, c='gray', alpha=0.3)
    ax.axvline(x=0, c='gray', alpha=0.3)

    if not ax_title: ax_title = TARGET_FT
    ax.set_title(
        f'{ax_title}', size=FONTSIZE+4,
        pad=10,
        fontdict={
            'weight': 'bold',
            'color': CAT_COLORS[TARGET_FT],
        },
    )

    ax.set_xlabel(f'Observed UPDRS-III\n(mean-corr. points)', size=FONTSIZE,)
    ax.set_ylabel(f'Predicted UPDRS-III\n(mean-corr. points)', size=FONTSIZE,)
    ax.tick_params(axis='both', which='both', size=FONTSIZE, labelsize=FONTSIZE)
    ax.spines[['right', 'top']].set_visible(False)

    return ax

In [None]:
def ax_residuals_plot(
    ax, y_true, y_pred,
    FONTSIZE=14, 
):
    residuals = y_true - y_pred

    ax.scatter(y_pred, residuals, alpha=0.5, color='gray', s=75,)
    ax.axhline(0, linestyle="--")

    ax.set_xlabel("Predicted UPDRS-III\n(mean-corr. points)", size=FONTSIZE,)
    ax.set_ylabel("Residuals (Observed - Predicted)", size=FONTSIZE,)
    ax.set_title("Cross-validation (training data) residuals", size=FONTSIZE,)
    ax.tick_params(axis='both', which='both', size=FONTSIZE, labelsize=FONTSIZE)
    ax.spines[['right', 'top']].set_visible(False)

    return ax

In [None]:
def plot_all_cv_results(
    fitted_models, cv_df, model_name='xgb',
    SAVE_FIG=False, SHOW_FIG=True, figname=None,
    CAT_COLORS = {'brady': 'orange', 'tremor': 'purple', 'gait': 'darkgreen'},
):
    # plot cross-validation results

    fig, axes = plt.subplots(3, 2, figsize=(12, 12))

    for i_ft, TARGET_FT in enumerate(['brady', 'tremor', 'gait']):

        cv_df = prep_data.get_lmm_df(traindf)
        X_train = cv_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)
        y_train = cv_df[f'UPDRS_SUM_{TARGET_FT}'].values

        y_train_pred = fitted_models[TARGET_FT].predict(X_train)

        r2 = r2_score(y_train, y_train_pred)
        rmse = root_mean_squared_error(y_train, y_train_pred)

        # print(f"R2 for {TARGET_FT}: {r2}, RMSE: {rmse}")

    # fig, axes = plt.subplots(1, 2, figsize=(12, 6))

        axes[i_ft, 0] = plot_pred_true_scatter(
            ax=axes[i_ft, 0], y_test=y_train, y_pred=y_train_pred,
            TARGET_FT=TARGET_FT, CAT_COLORS=CAT_COLORS,
            ax_title=f'{TARGET_FT} ({model_name}): R2: {round(r2, 2)}, RMSE: {round(rmse, 2)}'
        )
        axes[i_ft, 1] = ax_residuals_plot(ax=axes[i_ft, 1], y_true=y_train, y_pred=y_train_pred)

    plt.tight_layout()

    if SAVE_FIG:
        plt.savefig(
            os.path.join(figpath, 'train_data', 'ema_updrs_corr', figname),
            dpi=450, facecolor='w',
        )

    if SHOW_FIG: plt.show()
    else: plt.close()

### 4a) xgboost: non linear model

- "Gradient boosting constructs an ensemble of decision trees in a stage-wise manner, allowing modeling of nonlinear relationships and interactions between EMA items."

cross-validation (training data)

In [None]:
### crossvalidation single features
# TARGET_FT = 'brady'  # 'brady', 'tremor', 'gait', 'updrs_sum'

# cv_df = prep_data.get_lmm_df(traindf)

# X_train = cv_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)

# if TARGET_FT == 'updrs_sum':
#     # predict motor UPDRS change
#     y_train = np.sum(cv_df[['UPDRS_SUM_brady', 'UPDRS_SUM_tremor',
#                               'UPDRS_SUM_gait']], axis=1).values
# else:
#     y_train = cv_df[f'UPDRS_SUM_{TARGET_FT}'].values

In [None]:
def fit_cv_xgb(X_train, y_train):
    

    xgb = XGBRegressor(
        objective="reg:squarederror",
        random_state=42,
    )

    param_grid = {
        "n_estimators": [100, 200, 300],
        "max_depth": [2, 3, 4],
        "learning_rate": [0.05, 0.1],
        "subsample": [0.8, 1.0],
        "colsample_bytree": [0.8, 1.0],

    }

    grid_gb = GridSearchCV(
        xgb,
        param_grid,
        cv=5,
        scoring="r2"
    )

    grid_gb.fit(X_train, y_train)

    best_model = grid_gb.best_estimator_

    return best_model

In [None]:
# crossvalidation result dicts
xgb_models, xgb_hparams = {}, {}

# TARGET_FT = 'brady'  # 'brady', 'tremor', 'gait', 'updrs_sum'

for TARGET_FT in ['brady', 'tremor', 'gait']:
    cv_df = prep_data.get_lmm_df(traindf)
    X_train = cv_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)
    y_train = cv_df[f'UPDRS_SUM_{TARGET_FT}'].values

    # fit xgb model in crossvalidation grid search
    xgb_models[TARGET_FT] = fit_cv_xgb(X_train, y_train)
    # extract hyperparameters
    xgb_hparams[TARGET_FT] = xgb_models[TARGET_FT].get_params()

    # print(f"Optimal hyperparameters for {TARGET_FT}:")
    # for k, v in xgb_hparams[TARGET_FT].items():
    #     print(f"{k}: {v}")

In [None]:
plot_all_cv_results(
    fitted_models=xgb_models, cv_df=cv_df, model_name='xgb',
    SHOW_FIG=False, SAVE_FIG=True, figname='Mar2026_crossval_xgb',
)

### 4b) Linear regression cross-validation


In [None]:
def fit_cv_regr(X_train, y_train):
    
    # Pipeline: scaling + ElasticNet
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model", ElasticNet(max_iter=10000, random_state=42))
    ])

    # Hyperparameter grid
    param_grid = {
        "model__alpha": [0.001, 0.01, 0.1, 1.0, 10.0],
        "model__l1_ratio": [0.1, 0.3, 0.5, 0.7, 0.9]
    }

    grid_rg = GridSearchCV(
        pipe,
        param_grid,
        cv=5,
        scoring="r2",
        n_jobs=-1
    )

    grid_rg.fit(X_train, y_train)

    best_model = grid_rg.best_estimator_

    return best_model

In [None]:
# crossvalidation result dicts
regr_models, regr_hparams = {}, {}


for TARGET_FT in ['brady', 'tremor', 'gait']:
    cv_df = prep_data.get_lmm_df(traindf)
    X_train = cv_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)
    y_train = cv_df[f'UPDRS_SUM_{TARGET_FT}'].values

    # fit regression model in crossvalidation grid search
    regr_models[TARGET_FT] = fit_cv_regr(X_train, y_train)
    # extract hyperparameters
    regr_hparams[TARGET_FT] = regr_models[TARGET_FT].get_params()

# # extract optimal hyperparameters
# best_params = grid.best_params_
# best_alpha = best_params["model__alpha"]
# best_l1_ratio = best_params["model__l1_ratio"]

# print("Optimal alpha:", best_alpha)
# print("Optimal l1_ratio:", best_l1_ratio)

In [None]:
plot_all_cv_results(
    fitted_models=regr_models, cv_df=cv_df, model_name='regr',
    SHOW_FIG=False, SAVE_FIG=True, figname='Mar2026_crossval_regr',
)

### 4c) Holdout validation with xgboost

In [None]:

true_holdout_results = {
    'r2': {},
    'rmse': {},
    'y_test_true': {},
    'y_test_pred': {},
}

for TARGET_FT in ['brady', 'tremor', 'gait']:
    # training cohort for fitting final models
    cv_df = prep_data.get_lmm_df(traindf)
    X_train = cv_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)
    y_train = cv_df[f'UPDRS_SUM_{TARGET_FT}'].values
    # get optimal model
    opt_model0 = xgb_models[TARGET_FT]  # direct from saved model
    opt_hyperparams = xgb_hparams[TARGET_FT]  # from hyperparameter dict, for later permutation
    opt_model = XGBRegressor(
        **opt_hyperparams
    )
    opt_model.fit(X_train, y_train)

    # testing cohort for evaluating final models
    ho_df = prep_data.get_lmm_df(testdf)
    X_test = ho_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)

    # observed test values
    y_test_true = ho_df[f'UPDRS_SUM_{TARGET_FT}'].values

    y_test_pred = opt_model.predict(X_test)
    
    r2 = r2_score(y_test_true, y_test_pred)
    rmse = root_mean_squared_error(y_test_true, y_test_pred)
    true_holdout_results['r2'][TARGET_FT] = r2
    true_holdout_results['rmse'][TARGET_FT] = rmse
    true_holdout_results['y_test_true'][TARGET_FT] = y_test_true
    true_holdout_results['y_test_pred'][TARGET_FT] = y_test_pred

    print(f"Test set performance for {TARGET_FT}: R2: {r2}, RMSE: {rmse}")
    

permutation test

In [None]:
N_PERM = 500

perm_results_r2 = {ft: [] for ft in ['brady', 'tremor', 'gait']}

for TARGET_FT in ['brady', 'tremor', 'gait']:
    
    for i_perm in range(N_PERM):
        
        np.random.seed(i_perm)
        # training cohort for fitting final models
        cv_df = prep_data.get_lmm_df(traindf)
        X_train = cv_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)
        np.random.shuffle(X_train)  # shuffle EMA values to break true association with UPDRS
        y_train = cv_df[f'UPDRS_SUM_{TARGET_FT}'].values
        
        # get optimal model
        opt_hyperparams = xgb_hparams[TARGET_FT]  # from hyperparameter dict, for later permutation
        opt_model = XGBRegressor(
            **opt_hyperparams
        )
        opt_model.fit(X_train, y_train)  # with shuffled randomized training X

        # testing cohort for evaluating final models
        ho_df = prep_data.get_lmm_df(testdf)
        X_test = ho_df[f'EMA_SUM_{TARGET_FT}'].values.reshape(-1, 1)

        # observed test values
        y_test_true = ho_df[f'UPDRS_SUM_{TARGET_FT}'].values

        y_test_pred = opt_model.predict(X_test)
        
        r2 = r2_score(y_test_true, y_test_pred)
        perm_results_r2[TARGET_FT].append(r2)

        # rmse = root_mean_squared_error(y_test_true, y_test_pred)
        print(f"Permutation {i_perm+1}/{N_PERM} for {TARGET_FT}: R2: {r2},")

Plot holdout results

In [None]:
for ft in ['brady', 'tremor', 'gait']:

    print(f"{ft} permutation R2: mean: {np.mean(perm_results_r2[ft]).round(3)}, std: {np.std(perm_results_r2[ft]).round(3)}")
    p_value = np.mean(np.array(perm_results_r2[ft]) >= true_holdout_results[ft]['r2'])

    print(f"Permutation p-value for {ft}: {p_value}")

In [None]:
def plot_holdout_results(
    true_holdout_results, perm_results_r2=None,
    CAT_COLORS = {'brady': 'orange', 'tremor': 'purple', 'gait': 'darkgreen'},
    SAVE_FIG=False, SHOW_FIG=True, figname=None,
    # FONTSIZE=14,
):
    
    domains = list(CAT_COLORS.keys())
    titles = {'brady': "Bradykinesia", 'tremor': "Tremor", 'gait': "Gait"}

    y_true_list = [true_holdout_results['y_test_true'][d] for d in domains]
    y_pred_list = [true_holdout_results['y_test_pred'][d] for d in domains]

    plt.rcParams.update({
        "font.size": 10,
        "axes.titlesize": 11,
        "axes.labelsize": 10,
        "xtick.labelsize": 9,
        "ytick.labelsize": 9,
        "figure.dpi": 300
    })

    fig, axes = plt.subplots(
        1, 3, figsize=(12, 4),
        # sharex=True, sharey=True,
    )

    for ax, y_true, y_pred, sympt in zip(axes, y_true_list, y_pred_list, domains):

        # Scatter
        ax.scatter(y_true, y_pred, alpha=0.7, s=75, zorder=1,
                   color=CAT_COLORS[sympt], )

        # Axis limits
        min_val = min(y_true.min(), y_pred.min())
        max_val = max(y_true.max(), y_pred.max())
        buffer = 0.05 * (max_val - min_val)
        lims = [min_val - buffer, max_val + buffer]

        # Identity line
        ax.plot(lims, lims, linestyle="--", linewidth=2, color='gray', zorder=0, alpha=.7,)
        ax.set_xlim(lims)
        ax.set_ylim(lims)
        ax.set_aspect("equal", adjustable="box")

        # Metrics
        r2 = true_holdout_results['r2'][sympt]
        rmse = true_holdout_results['rmse'][sympt]

        if perm_results_r2 is None:
            boxtext = f"$R^2$ = {r2:.2f}\nRMSE = {rmse:.2f}"
        else:
            p_value = np.mean(np.array(perm_results_r2[sympt]) >= r2)
            boxtext = f"$R^2$ = {r2:.2f}\nRMSE = {rmse:.2f}\np={p_value:.3f}"
        ax.text(0.05, 0.95,
                boxtext,
                transform=ax.transAxes,
                verticalalignment="top",
                bbox=dict(boxstyle="round", facecolor="white", alpha=0.8))

        ax.set_title(titles[sympt], fontweight="bold")
        ax.set_xlabel("Observed UPDRS fluctuation (points)")
        ax.grid(False)

    axes[0].set_ylabel("Predicted UPDRS fluctuation (points)")

    plt.tight_layout()

    # Save as vector graphics for journal submission
    if SAVE_FIG and figname is not None:
        fig_path = os.path.join(figpath, 'ema_updrs_holdout', figname)
        for ext in ['', '.pdf', '.svg']:
            plt.savefig(fig_path + ext, bbox_inches="tight")

    if SHOW_FIG: plt.show()
    else: plt.close()

In [None]:
plot_holdout_results(
    true_holdout_results=true_holdout_results,
    perm_results_r2=perm_results_r2,
    SAVE_FIG=True, SHOW_FIG=False, figname='Mar2026_holdout_scatters',
)

In [None]:
# ### plot significancies from permutations

# def plot_holdout_signs():
#     fig, axes = plt.subplots(1, len(perm_stats.keys()), figsize=(12, 4))

#     for i_ax, metr in enumerate(list(perm_stats.keys())):

#         axes[i_ax].hist(perm_stats[metr], color='gray', alpha=.5,)
#         axes[i_ax].axvline(np.percentile(perm_stats[metr], 95),
#                         color='orange', alpha=.8, lw=3,
#                         label='permuted\nalpha 0.05',)
        
#         axes[i_ax].axvline(true_stats[metr],
#                         color='purple', alpha=.5, lw=1,
#                         label='prediction',)
        
#         p_calc = sum(np.array(perm_stats[metr]) > true_stats[metr]) / len(perm_stats[metr])
#         print(f'metric {metr}: p = {np.round(p_calc, 3)}')

#         axes[i_ax].set_xlabel(f'{metr} score', size=14,)

#         axes[i_ax].set_ylabel('count (n)', size=14)

#         axes[i_ax].tick_params(axis='both', size=14, labelsize=14,)
#         axes[i_ax].spines[['right', 'top']].set_visible(False)

#     axes[1].legend(frameon=False, fontsize=14,
#                 bbox_to_anchor=(.95, .5), loc='center left')

# plt.tight_layout()

# # plt.savefig(os.path.join(load_utils.get_onedrive_path('figures'),
# #              'ema_updrs_corr', f'holdOut_updrsSum_{N_PERM}permStats'),
# #              dpi=300, facecolor='w',)

# plt.close()