In [16]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pcntoolkit import BLR, BsplineBasisFunction, LinearBasisFunction, NormativeModel, NormData, load_fcon1000

In [68]:
import os
from typing import Any, Dict, List, Literal

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.font_manager import FontProperties

from pcntoolkit import NormativeModel, NormData


def plot_centiles(
    model: "NormativeModel",
    centiles: List[float] | np.ndarray | None = None,
    covariate: str | None = None,
    covariate_range: tuple[float, float] = (None, None),  # type: ignore
    batch_effects: Dict[str, List[str]] | None | Literal["all"] = None,
    scatter_data: NormData | None = None,
    harmonize_data: bool = True,
    hue_data: str = "site",
    markers_data: str = "sex",
    show_other_data: bool = False,
    show_thrivelines: bool = False,
    z_thrive: float = 0.0,
    save_dir: str | None = None,
    show_centile_labels: bool = True,
    show_legend: bool = True,
    plt_kwargs: dict | None = None,
    **kwargs: Any,
) -> None:
    """Generate centile plots for response variables with optional data overlay.

    This function creates visualization of centile curves for all response variables
    in the dataset. It can optionally show the actual data points overlaid on the
    centile curves, with customizable styling based on categorical variables.

    Parameters
    ----------
    model: NormativeModel
        The model to plot the centiles for.
    centiles: List[float] | np.ndarray | None, optional
        The centiles to plot. If None, the default centiles will be used.
    covariate: str | None, optional
        The covariate to plot on the x-axis. If None, the first covariate in the model will be used.
    covariate_range: tuple[float, float], optional
        The range of the covariate to plot on the x-axis. If None, the range of the covariate that was in the train data will be used.
    batch_effects: Dict[str, List[str]] | None | Literal["all"], optional
        The batch effects to plot the centiles for. If None, the batch effect that appears first in alphabetical order will be used.
    scatter_data: NormData | None, optional
        Data to scatter on top of the centiles.
    harmonize_data: bool, optional
        Whether to harmonize the scatter data before plotting. Data will be harmonized to the batch effect for which the centiles were computed.
    hue_data: str, optional
        The column to use for color coding the data. If None, the data will not be color coded.
    markers_data: str, optional
        The column to use for marker styling the data. If None, the data will not be marker styled.
    show_other_data: bool, optional
        Whether to scatter data belonging to groups not in batch_effects.
    save_dir: str | None, optional
        The directory to save the plot to. If None, the plot will not be saved.
    show_centile_labels: bool, optional
        Whether to show the centile labels on the plot.
    show_legend: bool, optional
        Whether to show the legend on the plot.
    plt_kwargs: dict, optional
        Additional keyword arguments for the plot.
    **kwargs: Any, optional
        Additional keyword arguments for the model.compute_centiles method.

    Returns
    -------
    None
        Displays the plot using matplotlib.
    """
    if covariate is None:
        covariate = model.covariates[0]
        assert isinstance(covariate, str)

    cov_min = covariate_range[0] or model.covariate_ranges[covariate]["min"]
    cov_max = covariate_range[1] or model.covariate_ranges[covariate]["max"]
    covariate_range = (cov_min, cov_max)

    if batch_effects == "all":
        batch_effects = model.unique_batch_effects
    elif batch_effects is None:
        batch_effects = {k: [v[0]] for k, v in model.unique_batch_effects.items()}

    if plt_kwargs is None:
        plt_kwargs = {}
    palette = plt_kwargs.pop("cmap", "viridis")

    # Create some synthetic data with a single batch effect
    # The plotted covariate is just a linspace
    centile_covariates = np.linspace(covariate_range[0], covariate_range[1], 150)
    centile_df = pd.DataFrame({covariate: centile_covariates})

    # TODO: use the mean here
    # Any other covariates are taken to be the midpoint between the observed min and max
    for cov in model.covariates:
        if cov != covariate:
            minc = model.covariate_ranges[cov]["min"]
            maxc = model.covariate_ranges[cov]["max"]
            centile_df[cov] = (minc + maxc) / 2

    # Batch effects are the first ones in the highlighted batch effects
    for be, v in batch_effects.items():
        centile_df[be] = v[0]
    # Response vars are all 0, we don't need them
    for rv in model.response_vars:
        centile_df[rv] = 0
    centile_data = NormData.from_dataframe(
        "centile",
        dataframe=centile_df,
        covariates=model.covariates,
        response_vars=model.response_vars,
        batch_effects=list(batch_effects.keys()),
    )  # type:ignore

    if not hasattr(centile_data, "centiles"):
        model.compute_centiles(centile_data, centiles=centiles, **kwargs)
    if scatter_data and show_thrivelines:
        model.compute_thrivelines(scatter_data, z_thrive=z_thrive)

    if not model.has_batch_effect:
        batch_effects = {}

    if harmonize_data and scatter_data:
        if model.has_batch_effect:
            reference_batch_effect = {k: v[0] for k, v in batch_effects.items()}
            model.harmonize(scatter_data, reference_batch_effect=reference_batch_effect)
        else:
            model.harmonize(scatter_data)

    for response_var in model.response_vars:
        _plot_centiles(
            centile_data=centile_data,
            response_var=response_var,
            covariate=covariate,
            covariate_range=covariate_range,
            batch_effects=batch_effects,
            scatter_data=scatter_data,
            harmonize_data=harmonize_data,
            hue_data=hue_data,
            markers_data=markers_data,
            show_other_data=show_other_data,
            show_thrivelines=show_thrivelines,
            palette=palette,
            save_dir=save_dir,
            show_centile_labels=show_centile_labels,
            show_legend=show_legend,
            plt_kwargs=plt_kwargs,
        )


def _plot_centiles(
    centile_data: NormData,
    response_var: str,
    covariate: str = None,  # type: ignore
    covariate_range: tuple[float, float] = (None, None),  # type: ignore
    batch_effects: Dict[str, List[str]] = None,  # type: ignore
    scatter_data: NormData | None = None,
    harmonize_data: bool = True,
    hue_data: str = "site",
    markers_data: str = "sex",
    show_other_data: bool = False,
    show_thrivelines: bool = False,
    palette: str = "viridis",
    save_dir: str | None = None,
    show_centile_labels: bool = True,
    show_legend: bool = True,
    plt_kwargs: dict = None,  # type: ignore
) -> None:
    sns.set_style("whitegrid")
    plt.figure(**plt_kwargs)
    cmap = plt.get_cmap(palette)

    filter_dict = {
        "covariates": covariate,
        "response_vars": response_var,
    }

    filtered = centile_data.sel(filter_dict)

    for centile in centile_data.coords["centile"][::-1]:
        d_mean = abs(centile - 0.5)
        if d_mean == 0:
            thickness = 2
        else:
            thickness = 1
        if d_mean <= 0.25:
            style = "-"

        elif d_mean <= 0.475:
            style = "--"
        else:
            style = ":"

        sns.lineplot(
            x=filtered.X,
            y=filtered.centiles.sel(centile=centile),
            # color=cmap(centile),
            color="black",
            linestyle=style,
            linewidth=thickness,
            zorder=2,
            legend="brief",
        )
        color = cmap(centile)
        font = FontProperties()
        font.set_weight("bold")
        if show_centile_labels:
            plt.text(
                s=centile.item(),
                x=filtered.X[0] - 1,
                y=filtered.centiles.sel(centile=centile)[0],
                color="black",
                horizontalalignment="right",
                verticalalignment="center",
                fontproperties=font,
            )
            plt.text(
                s=centile.item(),
                x=filtered.X[-1] + 1,
                y=filtered.centiles.sel(centile=centile)[-1],
                color="black",
                horizontalalignment="left",
                verticalalignment="center",
                fontproperties=font,
            )

    minx, maxx = plt.xlim()
    plt.xlim(minx - 0.1 * (maxx - minx), maxx + 0.1 * (maxx - minx))

    if scatter_data:
        scatter_filter = scatter_data.sel(filter_dict)
        df = scatter_filter.to_dataframe()
        scatter_data_name = "Y_harmonized" if harmonize_data else "Y"
        thriveline_data_name = "thrive_Y_harmonized" if harmonize_data else "thrive_Y"
        columns = [("X", covariate), (scatter_data_name, response_var)]
        columns.extend([("batch_effects", be.item()) for be in scatter_data.batch_effect_dims])
        df = df[columns]
        df.columns = [c[1] for c in df.columns]
        if batch_effects == {}:
            sns.scatterplot(
                df,
                x=covariate,
                y=response_var,
                label=scatter_data.name,
                color="black",
                s=20,
                alpha=0.6,
                zorder=1,
                linewidth=0,
            )
            if show_thrivelines:
                plt.plot(scatter_filter.thrive_X.to_numpy().T, scatter_filter[thriveline_data_name].to_numpy().T)
        else:
            idx = np.full(len(df), True)
            for j in batch_effects:
                idx = np.logical_and(
                    idx,
                    df[j].isin(batch_effects[j]),
                )
            be_df = df[idx]
            scatter = sns.scatterplot(
                data=be_df,
                x=covariate,
                y=response_var,
                hue=hue_data if hue_data in df else None,
                style=markers_data if markers_data in df else None,
                s=30,
                alpha=0.8,
                zorder=1,
                linewidth=0,
            )
            if show_thrivelines:
                plt.plot(scatter_filter.thrive_X.to_numpy().T, scatter_filter[thriveline_data_name].to_numpy().T)

            if show_other_data:
                non_be_df = df[~idx]
                non_be_df["marker"] = ["Other data"] * len(non_be_df)
                sns.scatterplot(
                    data=non_be_df,
                    x=covariate,
                    y=response_var,
                    color="black",
                    style="marker",
                    linewidth=0,
                    s=20,
                    alpha=0.4,
                    zorder=0,
                )

            if show_legend:
                legend = scatter.get_legend()
                if legend:
                    handles = legend.legend_handles
                    labels = [t.get_text() for t in legend.get_texts()]
                    plt.legend(
                        handles,
                        labels,
                        title_fontsize=10,
                    )
            else:
                plt.legend().remove()

    title = f"Centiles of {response_var}"
    if scatter_data:
        if harmonize_data:
            plotname = f"centiles_{response_var}_{scatter_data.name}_harmonized"
            title = f"{title}\n With harmonized {scatter_data.name} data"
        else:
            plotname = f"centiles_{response_var}_{scatter_data.name}"
            title = f"{title}\n With raw {scatter_data.name} data"
    else:
        plotname = f"centiles_{response_var}"

    plt.title(title)
    plt.xlabel(covariate)
    plt.ylabel(response_var)
    plt.ylim(500, 2500)
    if save_dir:
        plt.savefig(os.path.join(save_dir, f"{plotname}.png"), dpi=300)
    else:
        plt.show(block=False)
    plt.close()

In [69]:
# Download an example dataset
norm_data: NormData = load_fcon1000()

# Select only a few features
features_to_model = [
    "WM-hypointensities",
    "Right-Lateral-Ventricle",
    "Right-Amygdala",
    "CortexVol",
]
norm_data = norm_data.sel({"response_vars": features_to_model})

# Split into train and test sets
train, test = norm_data.train_test_split()

Process: 10589 - 2025-06-18 15:35:38 - Dataset "fcon1000" created.
    - 1078 observations
    - 1078 unique subjects
    - 1 covariates
    - 217 response variables
    - 2 batch effects:
    	sex (2)
	site (23)
    


In [70]:
model = NormativeModel.load("/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/hbr/save_dir")

In [82]:
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    show_centile_labels=True,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/1",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:04 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:04 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:04 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:05 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:06 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:07 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []


In [83]:
# synthetic = model.synthesize(n_samples=5000, covariate_range_per_batch_effect=True)  # <- will fill in the missing Y data
# synthetic.name = "fcon1000"
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    scatter_data=norm_data,  # Scatter the train data points
    # batch_effects="all",  # You can set this to "all" to show all batch effects
    show_other_data=True,  # Show data points that do not match any batch effects
    show_centile_labels=True,
    harmonize_data=False,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/2",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:09 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:09 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:09 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:10 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:11 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:13 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_be_df["marker"] = ["Other data"] * len(non_be_df)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_be_df["marker"] = ["Other data"] * len(non_be_df)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_be_df["marker"] = ["Other data"] * len(no

In [84]:
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    scatter_data=norm_data,  # Scatter the train data points
    batch_effects="all",  # You can set this to "all" to show all batch effects
    show_other_data=True,  # Show data points that do not match any batch effects
    show_centile_labels=True,
    harmonize_data=False,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/3",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:14 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:14 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:14 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:16 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:17 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:18 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


In [85]:
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    scatter_data=norm_data,  # Scatter the train data points
    batch_effects="all",  # You can set this to "all" to show all batch effects
    show_other_data=True,  # Show data points that do not match any batch effects
    show_centile_labels=True,
    harmonize_data=True,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/4",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:20 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:20 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:20 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:21 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:23 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:24 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:25 - Harmonizing data on 4 response variables.
Process: 10589 - 2025-06-18 15:38:25 - Harmonizing data for Right-Amygdala.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:26 - Harmonizing data for Right-Lateral-Ventricle.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:27 - Harmonizing data for WM-hypointensities.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:28 - Harmonizing data for CortexVol.


Sampling: []


In [86]:
norm_data

In [87]:
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    scatter_data=norm_data,  # Scatter the train data points
    batch_effects={"sex": ["M", "F"], "site": ["AnnArbor_a"]},  # You can set this to "all" to show all batch effects
    show_other_data=False,  # Show data points that do not match any batch effects
    show_centile_labels=True,
    harmonize_data=True,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/5",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:29 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:29 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:29 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:30 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:32 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:33 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:34 - Harmonizing data on 4 response variables.
Process: 10589 - 2025-06-18 15:38:34 - Harmonizing data for Right-Amygdala.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:35 - Harmonizing data for Right-Lateral-Ventricle.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:36 - Harmonizing data for WM-hypointensities.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:37 - Harmonizing data for CortexVol.


Sampling: []


In [88]:
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    scatter_data=norm_data,  # Scatter the train data points
    batch_effects={
        "sex": ["M", "F"],
        "site": [
            "AnnArbor_a",
            "Beijing_Zang",
        ],
    },  # You can set this to "all" to show all batch effects
    show_other_data=False,  # Show data points that do not match any batch effects
    show_centile_labels=True,
    harmonize_data=True,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/6",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:39 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:39 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:39 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:40 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:41 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:42 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:44 - Harmonizing data on 4 response variables.
Process: 10589 - 2025-06-18 15:38:44 - Harmonizing data for Right-Amygdala.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:45 - Harmonizing data for Right-Lateral-Ventricle.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:46 - Harmonizing data for WM-hypointensities.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:47 - Harmonizing data for CortexVol.


Sampling: []


In [89]:
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    scatter_data=norm_data,  # Scatter the train data points
    batch_effects={
        "sex": ["M", "F"],
        "site": [
            "AnnArbor_a",
            "Beijing_Zang",
            "Cambridge_Buckner",
        ],
    },  # You can set this to "all" to show all batch effects
    show_other_data=False,  # Show data points that do not match any batch effects
    show_centile_labels=True,
    harmonize_data=True,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/7",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:48 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:48 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:48 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:49 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:50 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:52 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:53 - Harmonizing data on 4 response variables.
Process: 10589 - 2025-06-18 15:38:53 - Harmonizing data for Right-Amygdala.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:54 - Harmonizing data for Right-Lateral-Ventricle.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:55 - Harmonizing data for WM-hypointensities.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:56 - Harmonizing data for CortexVol.


Sampling: []


In [90]:
plot_centiles(
    model,
    covariate_range=(10, 80),
    covariate="age",  # Which covariate to plot on the x-axis
    scatter_data=norm_data,  # Scatter the train data points
    batch_effects={
        "sex": ["M", "F"],
        "site": [
            "AnnArbor_a",
            "Beijing_Zang",
            "Cambridge_Buckner",
            "Milwaukee_b",
        ],
    },  # You can set this to "all" to show all batch effects
    show_other_data=False,  # Show data points that do not match any batch effects
    show_centile_labels=True,
    harmonize_data=True,
    # harmonize_data=True,  # Set this to False to see the difference
    show_legend=False,  # Don't show the legend because it crowds the plot
    save_dir="/Users/stijndeboer/Projects/PCN/PCNtoolkit/examples/resources/plots_for_presentation/8",
)

Sampling: []


Process: 10589 - 2025-06-18 15:38:57 - Dataset "centile" created.
    - 150 observations
    - 150 unique subjects
    - 1 covariates
    - 4 response variables
    - 2 batch effects:
    	sex (1)
	site (1)
    
Process: 10589 - 2025-06-18 15:38:57 - Computing centiles for 4 response variables.
Process: 10589 - 2025-06-18 15:38:57 - Computing centiles for CortexVol.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:38:58 - Computing centiles for Right-Lateral-Ventricle.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:39:00 - Computing centiles for WM-hypointensities.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:39:01 - Computing centiles for Right-Amygdala.


Sampling: []
Sampling: []
Sampling: []
Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:39:02 - Harmonizing data on 4 response variables.
Process: 10589 - 2025-06-18 15:39:02 - Harmonizing data for Right-Amygdala.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:39:03 - Harmonizing data for Right-Lateral-Ventricle.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:39:04 - Harmonizing data for WM-hypointensities.


Sampling: []
Sampling: []


Process: 10589 - 2025-06-18 15:39:05 - Harmonizing data for CortexVol.


Sampling: []
