# IPIN 2025 Flowcean Hands-on Session Solution



In [3]:
# some imports we will need
import logging

logger = logging.getLogger(__name__)

## Section 1 : Load and Prepare the Training Data

In [4]:
# Import flowcean and cli
import flowcean
import flowcean.cli

# Import some helper functions for loading ROS data
from os import PathLike

from _collections_abc import Iterable
from flowcean.core.transform import Lambda

# import transforms
from flowcean.polars import DataFrame, ExplodeTimeSeries, ZeroOrderHold
from flowcean.ros import load_rosbag

from _helper_functions import shift_in_time

# The function below looks for a config.yaml in the current directory
# In the config.yaml, we specify settings for our training run 
config = flowcean.cli.initialize()


### Task 1.1 Load Rosbags and Choose Inputs
                    

In [5]:

# Configure the load_rosbag() function below
def load_and_process_rosbag(
    path: str | PathLike,
    message_paths: Iterable[str | PathLike] | None = None,
) -> DataFrame:
    logger.info("Loading rosbag from: %s", path)

    rosbag = load_rosbag(
        # TODO: TASK 1.1
                path=path,
        topics={
            "/turtle1/cmd_vel": [
                "linear.x",
                "angular.z",
            ],
            "/turtle1/pose": [
                "x",
                "y",
                "theta",
            ],
        },
        message_paths=message_paths,
    )
    return (
        DataFrame(rosbag)
        | ZeroOrderHold(
            features=[
                "/turtle1/cmd_vel",
                "/turtle1/pose",
            ],
            name="measurements",
        )
        | ExplodeTimeSeries("measurements")
        | Lambda(shift_in_time)
    )

# using our loaded config we want to create training and evaluation samples
samples_train = load_and_process_rosbag(
    config.rosbag.training_path,
    config.rosbag.message_paths,
)
samples_eval = load_and_process_rosbag(
    config.rosbag.evaluation_path,
    config.rosbag.message_paths,
)

2025-09-09 08:22:10,366 [__main__][INFO] Loading rosbag from: recordings/turtle_training
2025-09-09 08:22:10,367 [flowcean.ros.rosbag][INFO] Loading data from cache...
2025-09-09 08:22:10,367 [__main__][INFO] Loading rosbag from: recordings/turtle_evaluation
2025-09-09 08:22:10,368 [flowcean.ros.rosbag][INFO] Loading data from cache...



### Task 1.2 Create Training Data Frame


In [6]:
# Modify the return statement below to include the necessary transforms
def load_and_process_rosbag(
        path: str | PathLike,
        message_paths: Iterable[str | PathLike] | None = None,
    ) -> DataFrame:
    logger.info("Loading rosbag from: %s", path)

    rosbag = load_rosbag(
        path=path,
        message_paths=message_paths,
        topics={
            "/turtle1/cmd_vel": [
                "linear.x",
                "angular.z",
            ],
            "/turtle1/pose": [
                "x",
                "y",
                "theta",
            ],
        },
    )
    return (
        DataFrame(rosbag)
        | ZeroOrderHold(
            features=[
                "/turtle1/cmd_vel",
                "/turtle1/pose",
            ],
            name="measurements",
        )
        | ExplodeTimeSeries("measurements")
        | Lambda(shift_in_time)
    )


# using our loaded config we want to create training and evaluation samples
samples_train = load_and_process_rosbag(
    config.rosbag.training_path,
    config.rosbag.message_paths,
)
samples_eval = load_and_process_rosbag(
    config.rosbag.evaluation_path,
    config.rosbag.message_paths,
)

2025-09-09 08:22:10,374 [__main__][INFO] Loading rosbag from: recordings/turtle_training
2025-09-09 08:22:10,374 [flowcean.ros.rosbag][INFO] Loading data from cache...
2025-09-09 08:22:10,375 [__main__][INFO] Loading rosbag from: recordings/turtle_evaluation
2025-09-09 08:22:10,375 [flowcean.ros.rosbag][INFO] Loading data from cache...


## Section 2 : Select Learners across Libraries 


In [7]:
# we load all the learners for our training loop
from flowcean.sklearn import RandomForestRegressorLearner, RegressionTree
from flowcean.torch import LightningLearner, MultilayerPerceptron
from flowcean.xgboost import XGBoostRegressorLearner

inputs = [
        "/turtle1/pose/x",
        "/turtle1/pose/y",
        "/turtle1/pose/theta",
        "/turtle1/cmd_vel/linear.x",
        "/turtle1/cmd_vel/angular.z",
]
outputs = [
        "/turtle1/pose/x_next",
        "/turtle1/pose/y_next",
        "/turtle1/pose/theta_next",
]


### Task 2.1 Learner configuration


In [8]:
# create and configure the learners below
regression_tree = RegressionTree(**config.training.tree)
random_forest = RandomForestRegressorLearner(
    **config.training.forest,
)
mlp = LightningLearner(
    module=MultilayerPerceptron(
        learning_rate=config.training.mlp.learning_rate,
        input_size=len(inputs),
        output_size=len(outputs),
    ),
    batch_size=config.training.mlp.batch_size,
    max_epochs=config.training.mlp.max_epochs,
)
xgb = XGBoostRegressorLearner()

### Task 2.2 Prepare Sequential Learning

In [9]:
learners = [
    regression_tree,
    random_forest,
    mlp,
    xgb,
]

<details>
  <summary>💡 Click to see the solution</summary>

```python
learners = [
    regression_tree,
    random_forest,
    mlp,
    xgb,
]
```

</details>

## Section 3: Training of the Models


In [10]:
# we load our learning strategy
from flowcean.core import learn_offline



### Task 3.1 Create a Sequential Learning Loop


In [11]:
models = []
for learner in learners:
    logger.info("Training model: %s", learner.name)
    model = None  # TODO: Task 3.1
    model = learn_offline(
        samples_train,
        learner,
        inputs=inputs,
        outputs=outputs,
    )
    models.append(model) # TODO: Task 3.1

2025-09-09 08:22:12,033 [__main__][INFO] Training model: RegressionTree
2025-09-09 08:22:12,033 [flowcean.core.strategies.offline][INFO] Learning with offline strategy
2025-09-09 08:22:12,037 [flowcean.core.strategies.offline][INFO] Selecting input and output features
2025-09-09 08:22:12,037 [flowcean.core.strategies.offline][INFO] Fitting transforms and applying them to features
2025-09-09 08:22:12,038 [flowcean.core.strategies.offline][INFO] Fitting output transform and applying it to output features
2025-09-09 08:22:12,038 [flowcean.core.strategies.offline][INFO] Learning model
2025-09-09 08:22:12,089 [__main__][INFO] Training model: RandomForestRegressorLearner
2025-09-09 08:22:12,090 [flowcean.core.strategies.offline][INFO] Learning with offline strategy
2025-09-09 08:22:12,091 [flowcean.core.strategies.offline][INFO] Selecting input and output features
2025-09-09 08:22:12,092 [flowcean.core.strategies.offline][INFO] Fitting transforms and applying them to features
2025-09-09 08:2

Epoch 1: 100%|██████████| 379/379 [00:01<00:00, 280.64it/s, v_num=0, train_loss=3.320] 

INFO: `Trainer.fit` stopped: `max_epochs=2` reached.
2025-09-09 08:22:17,310 [lightning.pytorch.utilities.rank_zero][INFO] `Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 379/379 [00:01<00:00, 279.77it/s, v_num=0, train_loss=3.320]


2025-09-09 08:22:17,346 [__main__][INFO] Training model: XGBoostRegressorLearner
2025-09-09 08:22:17,347 [flowcean.core.strategies.offline][INFO] Learning with offline strategy
2025-09-09 08:22:17,348 [flowcean.core.strategies.offline][INFO] Selecting input and output features
2025-09-09 08:22:17,349 [flowcean.core.strategies.offline][INFO] Fitting transforms and applying them to features
2025-09-09 08:22:17,349 [flowcean.core.strategies.offline][INFO] Fitting output transform and applying it to output features
2025-09-09 08:22:17,349 [flowcean.core.strategies.offline][INFO] Learning model


## Section 4 : Evaluation and Model Comparison


In [12]:
# we load our metrics for comparison
from flowcean.sklearn import MeanAbsoluteError, MeanSquaredError, R2Score
from custom_metrics.euclidean_distance import MeanEuclideanDistance

# import function for model comparison
from flowcean.core import evaluate_offline

### Task 4.1 Chose Metrics for Evaluation



In [13]:
metrics = [
    MeanAbsoluteError(),
    MeanSquaredError(),
    R2Score(),
    MeanEuclideanDistance(
        columns=[
            "/turtle1/pose/x_next",
            "/turtle1/pose/y_next",
        ],
    ),
]

### Task 4.2 Create an Evaluation Loop


In [14]:
report = evaluate_offline(
    models,
    environment=samples_eval,
    metrics=metrics,
    inputs=inputs,
    outputs=outputs,
)
report.great_table()

2025-09-09 08:22:17,507 [custom_metrics.euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 0.724503
2025-09-09 08:22:17,560 [custom_metrics.euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 0.520264
2025-09-09 08:22:17,751 [custom_metrics.euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 2.220751
2025-09-09 08:22:17,784 [custom_metrics.euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 0.028081


Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report
Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model
Model,MeanAbsoluteError,MeanAbsoluteError,MeanAbsoluteError,MeanSquaredError,MeanSquaredError,MeanSquaredError,R2Score,R2Score,R2Score,MeanEuclideanDistance
Model,/turtle1/pose/x_next,/turtle1/pose/y_next,/turtle1/pose/theta_next,/turtle1/pose/x_next,/turtle1/pose/y_next,/turtle1/pose/theta_next,/turtle1/pose/x_next,/turtle1/pose/y_next,/turtle1/pose/theta_next,MeanEuclideanDistance
DecisionTreeRegressor,0.4,0.48,0.4,0.32,0.52,0.42,0.96,0.94,0.84,0.72
RandomForestRegressor,0.31,0.33,0.37,0.2,0.23,0.31,0.98,0.97,0.88,0.52
PyTorchModel,1.52,1.44,1.02,3.52,2.89,1.45,0.57,0.66,0.43,2.22
XGBoostRegressorModel,0.02,0.02,0.02,0.0,0.0,0.03,1.0,1.0,0.99,0.03



###  Task 4.3 Select a Model and Visualization  



In [15]:
from _helper_functions import plot_predictions_vs_ground_truth

best_model = models[0]
logger.info("Best model: %s", best_model.name)

# Plots are saved under plots/
plot_predictions_vs_ground_truth(
    samples_eval=samples_eval.observe().collect(),
    input_names=inputs,
    output_names=outputs,
    models=models,
)

2025-09-09 08:22:17,876 [__main__][INFO] Best model: DecisionTreeRegressor
