In [None]:
import torch
from autoemulate.emulators import GaussianProcess
from autoemulate.emulators.random_forest import RandomForest
from autoemulate.emulators.transformed.base import TransformedEmulator
from autoemulate.transforms import PCATransform, VAETransform, StandardizeTransform
from sklearn.datasets import make_regression
import torchmetrics
from autoemulate.core.model_selection import evaluate
from autoemulate.core.types import TensorLike

# Uncomment to enable logging for GPs
import logging
# logging.basicConfig(level=logging.INFO)

def make_data(
    random_state: int = 42,
    n_samples: int = 200,
    n_informative:int = 2,
    n_features: int = 5,
    noise: float = 0.2,
    n_targets: int = 500
):
    x, y, _ = make_regression(
        n_samples=n_samples,
        n_features=n_features,
        noise=noise,
        random_state=random_state,
        n_informative=n_informative,
        n_targets=n_targets,
        coef=True,
    )
    x = torch.tensor(x, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    return x, y

# Train data
x, y = make_data(random_state=42, n_targets=10)

# Test data
x2, y2 = make_data(random_state=43, n_targets=10)

# Make transform
pca = PCATransform(n_components=2)
pca.fit(x)

# Transform data
print(pca(x).shape)

# Invert PCA on tensor
print(pca.inv(pca(x)).shape)

In [None]:
# Example y transform
pca_y = PCATransform(n_components=1)
pca_y.fit(y)
print(pca_y(y)[:10].shape)
print(pca_y.inv(pca_y(y))[:10].shape)


In [None]:
emulator = TransformedEmulator(
    x=x,
    y=y,
    x_transforms=[PCATransform(n_components=4), VAETransform(latent_dim=2)],
    y_transforms=[StandardizeTransform(), PCATransform(n_components=1)],
    model=GaussianProcess,
    epochs=100,
)

In [None]:
emulator.fit(x, y)

In [None]:
emulator.full_covariance

In [None]:
# Fit
emulator.fit(x, y)

In [None]:
# Predict on training data
pred = emulator.predict(x[:30])


In [None]:
# Example using transformed emulator with GP and Random Forest
from autoemulate.model_selection import r2_metric


for model in [GaussianProcess, RandomForest]:
    # Create transformed emulator with GP
    emulator = TransformedEmulator(
        x=x,
        y=y,
        x_transforms=[PCATransform(n_components=4), VAETransform(latent_dim=2)],
        y_transforms=[StandardizeTransform(), PCATransform(n_components=1)],
        model=model,
        epochs=100,
    )

    # Fit
    emulator.fit(x, y)

    # Predict on training data
    pred = emulator.predict(x[:30])
    pred = pred if isinstance(pred, TensorLike) else pred.mean
    print(f"Train R2: {evaluate(pred, y[:30], r2_metric()):.3f}")

    # Predict on test data
    pred = emulator.predict(x2)
    pred = pred if isinstance(pred, TensorLike) else pred.mean
    print(f"Test  R2: {evaluate(pred, y2, r2_metric()):.3f}")

In [None]:
from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal

# Mean zero as just test covariance
mean = torch.zeros(1, 10)
cov = torch.eye(10)
mvn = MultitaskMultivariateNormal(mean=mean, covariance_matrix=cov)

# Generate some samples
torch.manual_seed(42)
scales = torch.randint(100, size=(1, 10), dtype=torch.float32)
samples= MultivariateNormal(mean=mean, covariance_matrix=cov * scales).sample(torch.Size([100])).squeeze()

# Fit standardize transform
t = StandardizeTransform()
t.fit(samples)

In [None]:
# Compare inverse from sampling and analytical solution
sampled = t._inverse_sample(mvn, n_samples=10000).covariance_matrix
analytical = t._inverse_gaussian(mvn).covariance_matrix
diff = sampled - analytical
diff_normed = diff.abs() / sampled
torch.allclose(diff_normed.diagonal(), torch.zeros_like(diff_normed).diagonal(), atol=0.05)

In [None]:
# Compare analytical and sampling solutions for inverse transforms
import matplotlib.pyplot as plt
import numpy as np
from autoemulate.core.types import GaussianLike

for t in [VAETransform(latent_dim=2), PCATransform(n_components=2)]:
    x, y = make_data(n_targets=5, n_samples=200)
    t.fit(y)
    z = t(y)
    gp = GaussianProcess(x, z, standardize_x=True, standardize_y=True)
    gp.fit(x, z)
    z_pred = gp.predict(x[: x.shape[0] // 2])
    assert isinstance(z_pred, GaussianLike)

    print(str(t))
    for n_samples in [10, 100, 1000, 10000]:
        inv_sample = t._inverse_sample(z_pred, n_samples=n_samples)
        inv_gaussian = t._inverse_gaussian(z_pred)
        diff = inv_sample.covariance_matrix - inv_gaussian.covariance_matrix
        plt.title(f"n_samples={n_samples}")
        plt.hist(inv_sample.covariance_matrix.flatten().detach().numpy(), bins=np.arange(-1, 1, 0.01), alpha=0.5, label='Sampled')
        plt.hist(inv_gaussian.covariance_matrix.flatten().detach().numpy(), bins=np.arange(-1, 1, 0.01), alpha=0.5, label="Analytical")
        plt.hist(diff.flatten().detach().numpy(), bins=np.arange(-1, 1, 0.01), alpha=0.5, label='Difference')
        plt.legend()
        plt.show()

In [None]:
# Visualize the covariance matrices and difference (flatten non-batch dimension)
cov_inv_gaussian = inv_gaussian.covariance_matrix.flatten(1)
cov_inv_sample = inv_sample.covariance_matrix.flatten(1)
diff = (cov_inv_gaussian - cov_inv_sample)
fig, axs = plt.subplots(1, 3, figsize=(15, 5))
for idx, x_ in enumerate([cov_inv_gaussian, cov_inv_sample, diff]):
    x = x_.detach().numpy()
    if idx > 1:
        hmap = axs[idx].pcolormesh(x, vmin=-np.abs(x).max(), vmax=np.abs(x).max(), cmap="RdBu")
    else:
        hmap = axs[idx].pcolormesh(x)
    plt.colorbar(hmap)

In [None]:
# Compare analytical and sampling solutions for transformed emulators without standardization
import matplotlib.pyplot as plt
import numpy as np
from autoemulate.core.types import GaussianLike

for t in [VAETransform(latent_dim=2), PCATransform(n_components=2)]:
    x, y = make_data(n_targets=3)
    for n_samples in [10, 100, 1000, 10000]:
        gp = TransformedEmulator(
            x=x,
            y=y,
            y_transforms=[t],
            x_transforms=[],
            model=GaussianProcess,
            epochs=50,
            n_samples=n_samples,
            full_covariance=True,
            output_from_samples=False
        )
        gp.fit(x, y)
        # z_pred = gp.model.predict(gp.transforms[0](x[: x.shape[0] // 2]))
        z_pred = gp.model.predict(x[: x.shape[0] // 2])
        assert isinstance(z_pred, GaussianLike)
        inv_gaussian = gp._inv_transform_y_gaussian(z_pred)
        inv_sample = gp._inv_transform_y_gaussian_sample(z_pred)
        print(str(t))
        diff = inv_sample.covariance_matrix - inv_gaussian.covariance_matrix
        plt.title(f"n_samples={n_samples}")
        plt.hist(inv_sample.covariance_matrix.flatten().detach().numpy(), bins=list(np.arange(-1, 1, 0.01)), alpha=0.5, label='Sampled')
        plt.hist(inv_gaussian.covariance_matrix.flatten().detach().numpy(), bins=list(np.arange(-1, 1, 0.01)), alpha=0.5, label='Analytical')
        plt.hist(diff.flatten().detach().numpy(), bins=list(np.arange(-1, 1, 0.01)), alpha=0.5, label='Difference')
        plt.legend()
        plt.show()

In [None]:
# Compare analytical and sampling solutions for transformed emulators with standardization
import matplotlib.pyplot as plt
import numpy as np
from autoemulate.core.types import GaussianLike

for t in [VAETransform(latent_dim=2), PCATransform(n_components=2)]:
    x, y = make_data(n_targets=3)
    for n_samples in [10, 100, 1000, 10000]:
        gp = TransformedEmulator(
            x=x,
            y=y,
            x_transforms=[StandardizeTransform()],
            y_transforms=[StandardizeTransform(), t],
            model=GaussianProcess,
            epochs=50,
            n_samples=n_samples,
            full_covariance=True,
            output_from_samples=False
        )
        gp.fit(x, y)
        # z_pred = gp.model.predict(gp.transforms[0](x[: x.shape[0] // 2]))
        z_pred = gp.model.predict(x[: x.shape[0] // 2])
        assert isinstance(z_pred, GaussianLike)
        inv_gaussian = gp._inv_transform_y_gaussian(z_pred)
        inv_sample = gp._inv_transform_y_gaussian_sample(z_pred)
        print(str(t))
        diff = inv_sample.covariance_matrix - inv_gaussian.covariance_matrix
        plt.title(f"n_samples={n_samples}")
        plt.hist(inv_sample.covariance_matrix.flatten().detach().numpy(), bins=list(np.arange(-1, 1, 0.01)), alpha=0.5, label='Sampled')
        plt.hist(inv_gaussian.covariance_matrix.flatten().detach().numpy(), bins=list(np.arange(-1, 1, 0.01)), alpha=0.5, label='Analytical')
        plt.hist(diff.flatten().detach().numpy(), bins=list(np.arange(-1, 1, 0.01)), alpha=0.5, label='Difference')
        plt.legend()
        plt.show()