In [23]:
import os

import numpy as np
import pandas as pd
import sklearn.model_selection

np.random.seed(1)

In [24]:
# Generate data
N_samples = 10000
N_features = 2
N_covariates = 2
X = np.random.randn(N_samples, N_covariates)
Y = np.random.randn(N_samples, N_features)
sites = np.random.choice(a=3, size=N_samples, replace=True) + 1
sex = np.random.choice(a=2, size=N_samples, replace=True) + 1
batch_effects = np.stack([sites, sex], axis=1)

# Split data into original and transfer
original_data_mask = batch_effects[:, 0] != 3
X_or = X[original_data_mask]
Y_or = Y[original_data_mask]
be_or = batch_effects[original_data_mask]

transfer_data_mask = batch_effects[:, 0] == 3
X_tr = X[transfer_data_mask]
Y_tr = Y[transfer_data_mask]
be_tr = batch_effects[transfer_data_mask]

# Split into train and test sets
X_tr_or, X_ts_or, Y_tr_or, Y_ts_or, be_tr_or, be_ts_or = (
    sklearn.model_selection.train_test_split(
        X_or, Y_or, be_or, test_size=0.2, random_state=1, stratify=be_or[:, 1]
    )
)

X_tr_tr, X_ts_tr, Y_tr_tr, Y_ts_tr, be_tr_tr, be_ts_tr = (
    sklearn.model_selection.train_test_split(
        X_tr, Y_tr, be_tr, test_size=0.2, random_state=1, stratify=be_tr[:, 1]
    )
)


In [25]:
import pickle

# Save to file
tempdir = "/Users/stijndeboer/temp"
os.makedirs(tempdir, exist_ok=True)

# Original data
with open(os.path.join(tempdir, "X_tr_or.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(X_tr_or), f)
with open(os.path.join(tempdir, "Y_tr_or.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(Y_tr_or), f)
with open(os.path.join(tempdir, "be_tr_or.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(be_tr_or), f)
with open(os.path.join(tempdir, "X_ts_or.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(X_ts_or), f)
with open(os.path.join(tempdir, "Y_ts_or.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(Y_ts_or), f)
with open(os.path.join(tempdir, "be_ts_or.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(be_ts_or), f)

# Transfer data
with open(os.path.join(tempdir, "X_tr_tr.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(X_tr_tr), f)
with open(os.path.join(tempdir, "Y_tr_tr.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(Y_tr_tr), f)
with open(os.path.join(tempdir, "be_tr_tr.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(be_tr_tr), f)
with open(os.path.join(tempdir, "X_ts_tr.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(X_ts_tr), f)
with open(os.path.join(tempdir, "Y_ts_tr.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(Y_ts_tr), f)
with open(os.path.join(tempdir, "be_ts_tr.pkl"), "wb") as f:
    pickle.dump(pd.DataFrame(be_ts_tr), f)


In [26]:
import pcntoolkit as ptk

ptk.normative.estimate(
    covfile=os.path.join(tempdir, "X_tr_or.pkl"),
    respfile=os.path.join(tempdir, "Y_tr_or.pkl"),
    trbefile=os.path.join(tempdir, "be_tr_or.pkl"),
    testcov=os.path.join(tempdir, "X_ts_or.pkl"),
    testresp=os.path.join(tempdir, "Y_ts_or.pkl"),
    tsbefile=os.path.join(tempdir, "be_ts_or.pkl"),
    inscaler="standardize",
    outscaler="standardize",
    savemodel=True,
    alg="hbr",
)


inscaler: standardize
outscaler: standardize
Processing data in /Users/stijndeboer/temp/Y_tr_or.pkl
Estimating model  1 of 2


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag_grad...
Sequential sampling (1 chains in 1 job)
NUTS: [mu_slope_mu, sigma_slope_mu, offset_slope_mu, mu_intercept_mu, sigma_intercept_mu, offset_intercept_mu, mu_sigma, sigma_sigma, sigma]


Output()

In [5]:
ptk.normative.transfer(
    covfile=os.path.join(tempdir, "X_tr_tr.pkl"),
    respfile=os.path.join(tempdir, "Y_tr_tr.pkl"),
    trbefile=os.path.join(tempdir, "be_tr_tr.pkl"),
    testcov=os.path.join(tempdir, "X_ts_tr.pkl"),
    testresp=os.path.join(tempdir, "Y_ts_tr.pkl"),
    tsbefile=os.path.join(tempdir, "be_ts_tr.pkl"),
    alg="hbr",
    inscaler="standardize",
    outscaler="standardize",
    model_path="/Users/stijndeboer/Projects/PCN/PCNtoolkit/tests/Models",
    output_path="/Users/stijndeboer/Projects/PCN/PCNtoolkit/tests/Models",
    outputsuffix="_transfer",
)


Loading data ...
Using HBR transform...
Transferring model  1 of 2


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag_grad...
Sequential sampling (1 chains in 1 job)
NUTS: [mu_slope_mu, sigma_slope_mu, offset_slope_mu, mu_intercept_mu, sigma_intercept_mu, offset_intercept_mu, mu_sigma, sigma_sigma, sigma]


Output()

Sampling 1 chain for 500 tune and 1_000 draw iterations (500 + 1_000 draws total) took 12 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
Sampling: [y_like]


Output()

Using HBR transform...
Transferring model  2 of 2


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag_grad...
Sequential sampling (1 chains in 1 job)
NUTS: [mu_slope_mu, sigma_slope_mu, offset_slope_mu, mu_intercept_mu, sigma_intercept_mu, offset_intercept_mu, mu_sigma, sigma_sigma, sigma]


Output()

Sampling 1 chain for 500 tune and 1_000 draw iterations (500 + 1_000 draws total) took 12 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks
Sampling: [y_like]


Output()

Evaluating the model ...
Writing outputs ...


(array([[-0.0110398 , -0.06328177],
        [-0.03766446,  0.02061658],
        [-0.03117988, -0.04809136],
        ...,
        [ 0.03585486, -0.07153488],
        [-0.04672195, -0.01762015],
        [-0.03315978, -0.01259515]]),
 array([[0.95314577, 1.00393766],
        [0.91900985, 0.94084869],
        [1.03862792, 0.90314728],
        ...,
        [1.03915003, 0.95055646],
        [0.98424361, 1.0096423 ],
        [1.0244577 , 1.10469841]]),
 array([[ 1.33417502, -0.20998889],
        [-1.29727415, -0.58594985],
        [-0.02144152, -0.09143316],
        ...,
        [-1.53414995,  0.04511795],
        [-0.80707082,  0.04739652],
        [-2.10294282, -0.14015961]]))

In [6]:
# Load the original metadata
with open(os.path.join("Models", "meta_data.md"), "rb") as f:
    meta_data = pickle.load(f)

print(meta_data)

# Load the transfer metadata
with open(os.path.join("Models", "meta_data_transfer.md"), "rb") as f:
    meta_data_transfer = pickle.load(f)

print(meta_data_transfer)


{'valid_voxels': array([0, 1]), 'fold_num': 1, 'mean_resp': [array([0.01032056, 0.00951302])], 'std_resp': [array([1.01259045, 1.00453476])], 'scaler_cov': [<pcntoolkit.util.utils.scaler object at 0x17f41b9e0>], 'scaler_resp': [<pcntoolkit.util.utils.scaler object at 0x17fb26390>], 'regressor': 'hbr', 'inscaler': 'standardize', 'outscaler': 'standardize', 'versions': {'Python': '3.12.0', 'pytensor': '2.26.3', 'PyMC': '5.18.2', 'PCNtoolkit': ''}}
{'valid_voxels': array([0, 1]), 'fold_num': 1, 'mean_resp': [array([0.01032056, 0.00951302])], 'std_resp': [array([1.01259045, 1.00453476])], 'scaler_cov': <pcntoolkit.util.utils.scaler object at 0x17fd7a0c0>, 'scaler_resp': <pcntoolkit.util.utils.scaler object at 0x17f52aff0>, 'regressor': 'hbr', 'inscaler': 'standardize', 'outscaler': 'standardize', 'versions': {'Python': '3.12.0', 'pytensor': '2.26.3', 'PyMC': '5.18.2', 'PCNtoolkit': ''}}


In [21]:
# Scale the train data using the original scalers
X_tr_or_scaled = meta_data["scaler_cov"][0].transform(X_tr_or)
Y_tr_or_scaled = meta_data["scaler_resp"][0].transform(Y_tr_or)

# Scale the combined train data using the transfer scalers
X_all_scaled = meta_data_transfer["scaler_cov"].transform(
    np.concatenate([X_tr_or, X_tr_tr], axis=0)
)
Y_all_scaled = meta_data_transfer["scaler_resp"].transform(
    np.concatenate([Y_tr_or, Y_tr_tr], axis=0)
)


In [22]:
atol = 1e-3

print(
    np.allclose(
        X_tr_or_scaled.mean(axis=0), np.zeros(X_tr_or_scaled.shape[1]), atol=atol
    )
)
print(
    np.allclose(X_tr_or_scaled.std(axis=0), np.ones(X_tr_or_scaled.shape[1]), atol=atol)
)
print(
    np.allclose(
        Y_tr_or_scaled.mean(axis=0), np.zeros(Y_tr_or_scaled.shape[1]), atol=atol
    )
)
print(
    np.allclose(Y_tr_or_scaled.std(axis=0), np.ones(Y_tr_or_scaled.shape[1]), atol=atol)
)

print(
    np.allclose(X_all_scaled.mean(axis=0), np.zeros(X_all_scaled.shape[1]), atol=atol)
)
print(np.allclose(X_all_scaled.std(axis=0), np.ones(X_all_scaled.shape[1]), atol=atol))
print(
    np.allclose(Y_all_scaled.mean(axis=0), np.zeros(Y_all_scaled.shape[1]), atol=atol)
)
print(np.allclose(Y_all_scaled.std(axis=0), np.ones(Y_all_scaled.shape[1]), atol=atol))


True
True
True
True
True
True
True
True
