In [None]:
import copy
import logging
import warnings

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import seaborn as sns

import pcntoolkit.util.output
from pcntoolkit import (
    HBR,
    BsplineBasisFunction,
    NormalLikelihood,
    NormativeModel,
    NormData,
    load_fcon1000,
    make_prior,
)

sns.set_style("darkgrid")

# Suppress some annoying warnings and logs
pymc_logger = logging.getLogger("pymc")

pymc_logger.setLevel(logging.WARNING)
pymc_logger.propagate = False

warnings.simplefilter(action="ignore", category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'
pcntoolkit.util.output.Output.set_show_messages(True)

In [2]:
# Download an example dataset
norm_data: NormData = load_fcon1000()

# Select only a few features
features_to_model = [
    "WM-hypointensities",
    "Right-Lateral-Ventricle",
    # "Right-Amygdala",
    # "CortexVol",
]
norm_data = norm_data.sel({"response_vars": features_to_model})

all_sites = np.unique(norm_data.batch_effects.sel(batch_effect_dims="site").values)

# split all_sites into three random groups of 7 sites
np.random.shuffle(all_sites)
group1 = all_sites[:7]
group2 = all_sites[7:14]
group3 = all_sites[14:]
print(f"Group 1: {group1}")
print(f"Group 2: {group2}")
print(f"Group 3: {group3}")

data_group1, data_group23 = norm_data.batch_effects_split({"site": group1}, names=("group1", "group23"))
data_group2, data_group3 = data_group23.batch_effects_split({"site": group2}, names=("group2", "group3"))



Process: 81594 - 2025-09-04 11:03:21 - Dataset "fcon1000" created.
    - 1078 observations
    - 1078 unique subjects
    - 1 covariates
    - 217 response variables
    - 2 batch effects:
    	sex (2)
	site (23)
    
Group 1: ['Leiden_2180' 'AnnArbor_b' 'AnnArbor_a' 'PaloAlto' 'SaintLouis'
 'Berlin_Margulies' 'Oxford']
Group 2: ['Milwaukee_b' 'Baltimore' 'Bangor' 'Newark' 'Munchen' 'Cambridge_Buckner'
 'Oulu']
Group 3: ['Atlanta' 'ICBM' 'Leiden_2200' 'Queensland' 'Pittsburgh' 'NewYork_a'
 'Beijing_Zang' 'Cleveland' 'NewYork_a_ADHD']
Process: 81594 - 2025-09-04 11:03:21 - Dataset "group1" created.
    - 164 observations
    - 164 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (2)
	site (7)
    
Process: 81594 - 2025-09-04 11:03:21 - Dataset "group23" created.
    - 914 observations
    - 914 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (2)
	site (16)
    
Process: 81594 - 2025-09-04 11:03:21

In [3]:
mu = make_prior(
    linear=True,
    slope=make_prior(dist_name="Normal", dist_params=(0.0, 10.0)),
    intercept=make_prior(
        random=True,
        mu=make_prior(dist_name="Normal", dist_params=(0.0, 1.0)),
        sigma=make_prior(dist_name="Normal", dist_params=(0.0, 1.0), mapping="softplus", mapping_params=(0.0, 3.0)),
    ),
    basis_function=BsplineBasisFunction(basis_column=0, nknots=5, degree=3),
)
sigma = make_prior(
    linear=True,
    slope=make_prior(dist_name="Normal", dist_params=(0.0, 2.0)),
    intercept=make_prior(dist_name="Normal", dist_params=(1.0, 1.0)),
    basis_function=BsplineBasisFunction(basis_column=0, nknots=5, degree=3),
    mapping="softplus",
    mapping_params=(0.0, 3.0),
)

likelihood = NormalLikelihood(mu, sigma)

template_hbr = HBR(
    name="template",
    cores=16,
    progressbar=True,
    draws=1500,
    tune=500,
    chains=4,
    nuts_sampler="nutpie",
    likelihood=likelihood,
)

model = NormativeModel(
    template_regression_model=template_hbr,
    savemodel=True,
    evaluate_model=True,
    saveresults=True,
    saveplots=True,
    save_dir="resources/hbr_normal/save_dir",
    inscaler="standardize",
    outscaler="standardize",
)

In [4]:
model1 = copy.deepcopy(model)
model1.save_dir = "resources/hbr_merge/model1"
model2 = copy.deepcopy(model)
model2.save_dir = "resources/hbr_merge/model2"
model3 = copy.deepcopy(model)
model3.save_dir = "resources/hbr_merge/model3"

model1.fit(data_group1)
model2.fit(data_group2)
model3.fit(data_group3)

Process: 81594 - 2025-09-04 11:03:21 - Fitting models on 2 response variables.
Process: 81594 - 2025-09-04 11:03:21 - Fitting model for WM-hypointensities.


Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,4,0.13,223
,2000,8,0.12,255
,2000,2,0.12,127
,2000,50,0.12,127


Process: 81594 - 2025-09-04 11:03:30 - Fitting model for Right-Lateral-Ventricle.


Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,0,0.17,95
,2000,2,0.16,63
,2000,1,0.17,63
,2000,0,0.16,63


Process: 81594 - 2025-09-04 11:03:36 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:03:36 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:03:36 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:03:36 - Computing z-scores for WM-hypointensities.
Process: 81594 - 2025-09-04 11:03:36 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:03:36 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:03:37 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:03:38 - Computing log-probabilities for 2 response variables.
Process: 81594 - 2025-09-04 11:03:38 - Computing log-probabilities for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:03:39 - Computing log-probabilities for WM-hypointensities.
Process: 81594 - 2025-09-04 11:03:39 - Computing yhat for 2 response variables.




Process: 81594 - 2025-09-04 11:03:40 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:03:40 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:03:40 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:03:41 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:03:42 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:03:42 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:03:42 - Harmonizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:03:43 - Saving model to:
	resources/hbr_merge/model1.
Process: 81594 - 2025-09-04 11:03:43 - Fitting models on 2 response variables.
Process: 81594 - 2025-09-04 11:03:43 - Fitting model for WM-hypointensities.


Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,9,0.13,255
,2000,4,0.12,159
,2000,12,0.13,31
,2000,5,0.13,63


Process: 81594 - 2025-09-04 11:03:54 - Fitting model for Right-Lateral-Ventricle.


Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,5,0.15,63
,2000,14,0.15,159
,2000,0,0.13,31
,2000,2,0.15,127


Process: 81594 - 2025-09-04 11:04:03 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:04:03 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:04:03 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:03 - Computing z-scores for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:03 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:04:03 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:05 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:06 - Computing log-probabilities for 2 response variables.
Process: 81594 - 2025-09-04 11:04:06 - Computing log-probabilities for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:06 - Computing log-probabilities for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:07 - Computing yhat for 2 response variables.




Process: 81594 - 2025-09-04 11:04:07 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:04:07 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:04:07 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:09 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:10 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:04:10 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:10 - Harmonizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:11 - Saving model to:
	resources/hbr_merge/model2.
Process: 81594 - 2025-09-04 11:04:11 - Fitting models on 2 response variables.
Process: 81594 - 2025-09-04 11:04:11 - Fitting model for WM-hypointensities.


Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,3,0.11,127
,2000,36,0.11,191
,2000,4,0.11,63
,2000,3,0.11,63


Process: 81594 - 2025-09-04 11:04:23 - Fitting model for Right-Lateral-Ventricle.


Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,9,0.13,63
,2000,6,0.14,31
,2000,20,0.13,95
,2000,97,0.13,127


Process: 81594 - 2025-09-04 11:04:33 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:04:33 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:04:33 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:34 - Computing z-scores for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:34 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:04:34 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:35 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:37 - Computing log-probabilities for 2 response variables.
Process: 81594 - 2025-09-04 11:04:37 - Computing log-probabilities for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:37 - Computing log-probabilities for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:37 - Computing yhat for 2 response variables.




Process: 81594 - 2025-09-04 11:04:38 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:04:38 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:04:38 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:39 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:40 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:04:40 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:41 - Harmonizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:42 - Saving model to:
	resources/hbr_merge/model3.


In [5]:
# model1 = NormativeModel.load(path="resources/hbr_merge/model1")
# model2 = NormativeModel.load(path="resources/hbr_merge/model2")
# model3 = NormativeModel.load(path="resources/hbr_merge/model3")

In [6]:
# We can pass a list of models or paths to the merge function.
merged_model = NormativeModel.merge(save_dir="resources/hbr_merge/merged_model", models=["resources/hbr_merge/model1", model2, model3])
# merged_model = NormativeModel.load(path="resources/hbr_merge/merged_model")

Process: 81594 - 2025-09-04 11:04:42 - Dataset "synthesized" created.
    - 164 observations
    - 164 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (2)
	site (7)
    
Process: 81594 - 2025-09-04 11:04:42 - Synthesizing data for 2 response variables.
Process: 81594 - 2025-09-04 11:04:42 - Synthesizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:42 - Synthesizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:42 - Dataset "synthesized" created.
    - 423 observations
    - 423 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (2)
	site (7)
    
Process: 81594 - 2025-09-04 11:04:42 - Synthesizing data for 2 response variables.
Process: 81594 - 2025-09-04 11:04:42 - Synthesizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:04:43 - Synthesizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:04:43 - Dataset "synthesized" created

Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,3,0.13,31
,2000,0,0.11,63
,2000,3,0.12,63
,2000,2,0.12,63


Process: 81594 - 2025-09-04 11:04:59 - Fitting model for WM-hypointensities.


Progress,Draws,Divergences,Step Size,Gradients/Draw
,2000,8,0.11,159
,2000,16,0.11,127
,2000,11,0.11,95
,2000,7,0.11,63


Process: 81594 - 2025-09-04 11:05:19 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:05:19 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:05:19 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:20 - Computing z-scores for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:20 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:05:20 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:22 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:24 - Computing log-probabilities for 2 response variables.
Process: 81594 - 2025-09-04 11:05:24 - Computing log-probabilities for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:24 - Computing log-probabilities for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:25 - Computing yhat for 2 response variables.




Process: 81594 - 2025-09-04 11:05:26 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:05:26 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:05:26 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:27 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:28 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:05:28 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:29 - Harmonizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:30 - Saving model to:
	resources/hbr_merge/merged_model.


In [7]:
merged_model.predict(data_group1)
merged_model.predict(data_group2)
merged_model.predict(data_group3)
merged_model.predict(norm_data)


Process: 81594 - 2025-09-04 11:05:30 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:05:30 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:05:30 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:30 - Computing z-scores for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:30 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:05:30 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:31 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:33 - Computing log-probabilities for 2 response variables.
Process: 81594 - 2025-09-04 11:05:33 - Computing log-probabilities for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:33 - Computing log-probabilities for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:33 - Computing yhat for 2 response variables.




Process: 81594 - 2025-09-04 11:05:34 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:05:34 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:05:34 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:35 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:36 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:05:36 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:36 - Harmonizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:37 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:05:37 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:05:37 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:3



Process: 81594 - 2025-09-04 11:05:42 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:05:42 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:05:42 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:43 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:44 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:05:44 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:45 - Harmonizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:45 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:05:45 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:05:45 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:4



Process: 81594 - 2025-09-04 11:05:50 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:05:50 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:05:50 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:51 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:52 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:05:52 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:53 - Harmonizing data for WM-hypointensities.
Process: 81594 - 2025-09-04 11:05:54 - Making predictions on 2 response variables.
Process: 81594 - 2025-09-04 11:05:54 - Computing z-scores for 2 response variables.
Process: 81594 - 2025-09-04 11:05:54 - Computing z-scores for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:05:5



Process: 81594 - 2025-09-04 11:06:00 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 2 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 81594 - 2025-09-04 11:06:00 - Computing centiles for 2 response variables.
Process: 81594 - 2025-09-04 11:06:00 - Computing centiles for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:06:01 - Computing centiles for WM-hypointensities.
Process: 81594 - 2025-09-04 11:06:03 - Harmonizing data on 2 response variables.
Process: 81594 - 2025-09-04 11:06:03 - Harmonizing data for Right-Lateral-Ventricle.
Process: 81594 - 2025-09-04 11:06:03 - Harmonizing data for WM-hypointensities.
