# Train a MLP Predictor on a Synthetic Dataset

Here we test and plot a MLP predictor on the Synthetic Circle Cluster Dataset


In [None]:
from itertools import product

from prescyent.dataset import SCCDataset, SCCDatasetConfig

xs = list(range(0, 16, 4))
ys = list(range(0, 16, 4))
clusters = list(product(xs, ys))
num_clusters = len(clusters)
cluster_radius = 1
cluster_num_traj = 12

num_trajs = [cluster_num_traj for _ in range(num_clusters)]
radius = [cluster_radius for _ in range(num_clusters)]
starting_xs = [x_value for x_value, _ in  clusters]
starting_ys = [y_value for _ , y_value in  clusters]

dataset_config = SCCDatasetConfig(
    # seed=5,  # if none, the seed is random
    frequency=20,
    future_size=10,
    history_size=20,
    radius_eps=0.2,
    perturbation_range=0.2,
    num_perturbation_points=10,
    num_points=100,
    num_trajs=num_trajs,
    radius=radius,
    starting_xs=starting_xs,
    starting_ys=starting_ys,
)
dataset = SCCDataset(dataset_config)

dataset.plot_traj(dataset.trajectories.test[0])
dataset.plot_trajectories_dim_wise(dataset.trajectories.test, title="All test trajectories")

In [None]:

from pathlib import Path

from prescyent.predictor import MlpPredictor, MlpConfig, ConstantPredictor, PredictorConfig, TrainingConfig
from prescyent.scaler import ScalerConfig
from prescyent.utils.enums import Scalers, TrajectoryDimensions, LossFunctions

# -- Init scaler
scaler_config = ScalerConfig(
    do_feature_wise_scaling=True,
    scaler=Scalers.STANDARDIZATION,
    scaling_axis=TrajectoryDimensions.TEMPORAL,
)
# -- Init predictor
print("Initializing predictor...", end=" ")
config = MlpConfig(
    dataset_config=dataset_config,
    context_size=dataset.context_size_sum,
    scaler_config=scaler_config,
    hidden_size=128,
    num_layers=4,
    deriv_on_last_frame=True,
    loss_fn=LossFunctions.MTDLOSS,
)
predictor = MlpPredictor(config=config)
print("OK")

# Train
training_config = TrainingConfig(
    max_epochs=200,  # Maximum number of training epochs
    devices="auto",  # Chose the best available devices (see lightning documentation for more)
    accelerator="auto",  # Chose the best available accelerator (see lightning documentation for more)
    lr=0.0001,  # The learning rate
    early_stopping_patience=10,  # We'll stop the training before max_epochs if the validation loss doesn't improve for 10 epochs
)

# Scaler is also trained by the predictor's method !
predictor.train(dataset, training_config)

# Save the predictor
xp_dir = (
    Path("data")
    / "models"
    / f"{dataset.DATASET_NAME}"
    / f"h{dataset_config.history_size}_f{dataset_config.future_size}_{dataset.frequency}hz"
)
model_dir = xp_dir / f"{predictor.name}" / f"version_{predictor.version}"
print("Model directory:", model_dir)
predictor.save(model_dir, rm_log_path=False)
# We can save also the dataset config so that we can load it later if needed
dataset.save_config(model_dir / "dataset_config.json")

# Test predictor over the test set so that we know how good we are
predictor.test(dataset)

# Compare with delayed baseline
delayed_config = PredictorConfig(
    dataset_config=dataset_config, save_path=f"{xp_dir}"
)
delayed = ConstantPredictor(config=delayed_config)
delayed.test(dataset)

print(
    "You can visualize all logs from this script at xp_dir using tensorboard like this:"
)
print(f"tensorboard --logdir {xp_dir}")
