In [None]:
import torch
from autoemulate.emulators import GaussianProcess
from autoemulate.emulators.random_forest import RandomForest
from autoemulate.emulators.transformed.base import TransformedEmulator
from autoemulate.transforms import PCATransform, VAETransform, StandardizeTransform
from sklearn.datasets import make_regression
import torchmetrics
from autoemulate.core.model_selection import evaluate
from autoemulate.core.types import TensorLike
from autoemulate.core.compare import AutoEmulate

# Uncomment to enable logging for GPs
import logging
# logging.basicConfig(level=logging.INFO)

def make_data(
    random_state: int = 42,
    n_samples: int = 200,
    n_informative:int = 2,
    n_features: int = 5,
    noise: float = 0.2,
    n_targets: int = 500
):
    x, y, _ = make_regression(
        n_samples=n_samples,
        n_features=n_features,
        noise=noise,
        random_state=random_state,
        n_informative=n_informative,
        n_targets=n_targets,
        coef=True,
    )
    x = torch.tensor(x, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    return x, y

# Train data
x, y = make_data(random_state=42, n_features=10, n_targets=2, noise=0.01)

# Test data
x2, y2 = make_data(random_state=43, n_features=10, n_targets=2, noise=0.01)


In [None]:
ae = AutoEmulate(
    x,
    y,
    models=[GaussianProcess, RandomForest],
    x_transforms_list=[[], [StandardizeTransform(), PCATransform(n_components=5)]],
    y_transforms_list=[[], [StandardizeTransform(), PCATransform(n_components=1)]]
)


In [None]:
outputs  = ae.compare(4)


In [None]:
outputs

In [None]:
import pandas as pd
df = pd.DataFrame.from_records(outputs).sort_values(by=["r2_score", "rmse_score"], ascending=False, inplace=False)
best_model_idx = df.index[0]
df


In [None]:
outputs[best_model_idx]

In [None]:
em = TransformedEmulator(
    x,
    y,
    model=outputs[best_model_idx]["model_cls"],
    x_transforms=outputs[best_model_idx]["x_transforms"],
    y_transforms=outputs[best_model_idx]["y_transforms"],
    **outputs[best_model_idx]["config"]
)
em.fit(x, y)

In [None]:
from autoemulate.model_selection import r2_metric

y_pred = em.predict(x[:100])
evaluate(y_pred.mean, y[:100], r2_metric())

In [None]:
# Poor held-out performance
y_pred = em.predict(x2)
evaluate(y_pred.mean, y2, r2_metric())