# IPIN 2025 Flowcean Hands-on Session

## Section 1 : Load and Prepare the Training Data

In [14]:
import flowcean.cli

config = flowcean.cli.initialize()


### Task 1.1 Load Rosbags and Choose Inputs

In [15]:
from flowcean.polars import DataFrame

topics = {
    "/turtle1/cmd_vel": ["linear.x", "angular.z"],
    "/turtle1/pose": ["x", "y", "theta"],
}
# show current data structure without transforms
rosbag_train = DataFrame.from_rosbag(config.rosbag.training_path, topics=topics)
print(rosbag_train.observe().collect())

2025-09-09 18:43:56,751 [flowcean.ros.rosbag][INFO] Loading data from cache...


shape: (1, 2)
┌─────────────────────────────────┬─────────────────────────────────┐
│ /turtle1/cmd_vel                ┆ /turtle1/pose                   │
│ ---                             ┆ ---                             │
│ list[struct[2]]                 ┆ list[struct[2]]                 │
╞═════════════════════════════════╪═════════════════════════════════╡
│ [{1755698408916415574,{2.0,0.0… ┆ [{1755698397155709229,{5.54444… │
└─────────────────────────────────┴─────────────────────────────────┘


### Task 1.2 Create Training Data Frame

In [16]:
from _helper_functions import ShiftInTime
from flowcean.polars import DataFrame, ExplodeTimeSeries, ZeroOrderHold

transforms = (
    ZeroOrderHold(
        features=[
            "/turtle1/cmd_vel",
            "/turtle1/pose",
        ],
        name="measurements",
    )
    | ExplodeTimeSeries("measurements")
    | ShiftInTime(
        features=["/turtle1/pose/x", "/turtle1/pose/y", "/turtle1/pose/theta"],
        steps=1,
        suffix="_next",
    )
)

training_environment = (
    DataFrame.from_rosbag(config.rosbag.training_path, topics=topics) | transforms
)
evaluation_environment = (
    DataFrame.from_rosbag(config.rosbag.evaluation_path, topics=topics) | transforms
)
print(training_environment.observe().collect())

2025-09-09 18:43:56,795 [flowcean.ros.rosbag][INFO] Loading data from cache...
2025-09-09 18:43:56,796 [flowcean.ros.rosbag][INFO] Loading data from cache...


shape: (24_220, 9)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ time      ┆ /turtle1/ ┆ /turtle1/ ┆ /turtle1/ ┆ … ┆ /turtle1/ ┆ /turtle1/ ┆ /turtle1/ ┆ /turtle1 │
│ ---       ┆ cmd_vel/l ┆ cmd_vel/a ┆ pose/x    ┆   ┆ pose/thet ┆ pose/x_ne ┆ pose/y_ne ┆ /pose/th │
│ i64       ┆ inear.x   ┆ ngular.z  ┆ ---       ┆   ┆ a         ┆ xt        ┆ xt        ┆ eta_next │
│           ┆ ---       ┆ ---       ┆ f64       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
│           ┆ f64       ┆ f64       ┆           ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 175569840 ┆ 2.0       ┆ 0.0       ┆ 5.544445  ┆ … ┆ 0.0       ┆ 5.576445  ┆ 5.544445  ┆ 0.0      │
│ 891641557 ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 4         ┆           ┆           ┆           ┆   ┆           ┆       

## Section 2 : Select Learners across Libraries 

In [17]:
inputs = [
    "/turtle1/pose/x",
    "/turtle1/pose/y",
    "/turtle1/pose/theta",
    "/turtle1/cmd_vel/linear.x",
    "/turtle1/cmd_vel/angular.z",
]
outputs = [
    "/turtle1/pose/x_next",
    "/turtle1/pose/y_next",
    "/turtle1/pose/theta_next",
]

### Task 2.1 Learner configuration

In [18]:
from flowcean.sklearn import RandomForestRegressorLearner, RegressionTree
from flowcean.torch import LightningLearner, MultilayerPerceptron
from flowcean.xgboost import XGBoostRegressorLearner

regression_tree = RegressionTree(max_leaf_nodes=config.training.tree.max_leaf_nodes)

random_forest = RandomForestRegressorLearner(
    n_estimators=config.training.forest.n_estimators,
    max_depth=config.training.forest.max_depth,
)

mlp = LightningLearner(
    module=MultilayerPerceptron(
        learning_rate=config.training.mlp.learning_rate,
        output_size=len(outputs),
    ),
    batch_size=config.training.mlp.batch_size,
    max_epochs=config.training.mlp.max_epochs,
    accelerator="cpu",
)

xgb = XGBoostRegressorLearner()

### Task 2.2 Prepare Sequential Learning

In [19]:
learners = [
    regression_tree,
    random_forest,
    mlp,
    xgb,
]

## Section 3: Training of the Models

### Task 3.1 Create a Sequential Learning Loop

In [20]:
from flowcean.core import learn_offline

models = []
for learner in learners:
    print(f"Training model: {learner.name}")
    model = learn_offline(
        training_environment,
        learner,
        inputs=inputs,
        outputs=outputs,
    )
    models.append(model)



2025-09-09 18:43:56,907 [flowcean.core.strategies.offline][INFO] Learning with offline strategy
2025-09-09 18:43:56,908 [flowcean.core.strategies.offline][INFO] Selecting input and output features
2025-09-09 18:43:56,909 [flowcean.core.strategies.offline][INFO] Fitting transforms and applying them to features
2025-09-09 18:43:56,909 [flowcean.core.strategies.offline][INFO] Fitting output transform and applying it to output features
2025-09-09 18:43:56,909 [flowcean.core.strategies.offline][INFO] Learning model
2025-09-09 18:43:56,955 [flowcean.core.strategies.offline][INFO] Learning with offline strategy
2025-09-09 18:43:56,956 [flowcean.core.strategies.offline][INFO] Selecting input and output features
2025-09-09 18:43:56,956 [flowcean.core.strategies.offline][INFO] Fitting transforms and applying them to features


Training model: RegressionTree
Training model: RandomForestRegressorLearner


2025-09-09 18:43:56,956 [flowcean.core.strategies.offline][INFO] Fitting output transform and applying it to output features
2025-09-09 18:43:56,957 [flowcean.core.strategies.offline][INFO] Learning model
2025-09-09 18:43:59,218 [flowcean.sklearn.random_forest][INFO] Using Random Forest Regressor
  output_names=outputs.columns,
2025-09-09 18:43:59,219 [flowcean.core.strategies.offline][INFO] Learning with offline strategy
2025-09-09 18:43:59,220 [flowcean.core.strategies.offline][INFO] Selecting input and output features
2025-09-09 18:43:59,220 [flowcean.core.strategies.offline][INFO] Fitting transforms and applying them to features
2025-09-09 18:43:59,221 [flowcean.core.strategies.offline][INFO] Fitting output transform and applying it to output features
2025-09-09 18:43:59,221 [flowcean.core.strategies.offline][INFO] Learning model
INFO: 💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, whic

Training model: LightningLearner
Epoch 1: 100%|██████████| 379/379 [00:01<00:00, 311.37it/s, v_num=7, train_loss=4.420]

INFO: `Trainer.fit` stopped: `max_epochs=2` reached.
2025-09-09 18:44:01,718 [lightning.pytorch.utilities.rank_zero][INFO] `Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 379/379 [00:01<00:00, 310.51it/s, v_num=7, train_loss=4.420]


2025-09-09 18:44:01,746 [flowcean.core.strategies.offline][INFO] Learning with offline strategy


Training model: XGBoostRegressorLearner


2025-09-09 18:44:01,748 [flowcean.core.strategies.offline][INFO] Selecting input and output features
2025-09-09 18:44:01,748 [flowcean.core.strategies.offline][INFO] Fitting transforms and applying them to features
2025-09-09 18:44:01,748 [flowcean.core.strategies.offline][INFO] Fitting output transform and applying it to output features
2025-09-09 18:44:01,749 [flowcean.core.strategies.offline][INFO] Learning model


## Section 4 : Evaluation and Model Comparison

### Task 4.1 Chose Metrics for Evaluation

In [21]:
from euclidean_distance import MeanEuclideanDistance
from flowcean.sklearn import MeanAbsoluteError, MeanSquaredError, R2Score

metrics = [
    MeanAbsoluteError(),
    MeanSquaredError(),
    R2Score(),
    MeanEuclideanDistance(
        features=["/turtle1/pose/x_next", "/turtle1/pose/y_next"],
    ),
]

### Task 4.2 Create an Evaluation Loop

In [22]:
from flowcean.core import evaluate_offline

report = evaluate_offline(
    models,
    environment=evaluation_environment,
    metrics=metrics,
    inputs=inputs,
    outputs=outputs,
)
report.great_table()

2025-09-09 18:44:01,955 [euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 0.699647
2025-09-09 18:44:01,998 [euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 0.510113
2025-09-09 18:44:02,183 [euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 2.894146
2025-09-09 18:44:02,215 [euclidean_distance][INFO] Computed EuclideanDistance over columns ['/turtle1/pose/x_next', '/turtle1/pose/y_next']: 0.028081


Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report,Model Evaluation Report
Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model,Metrics for each trained model
Model,MeanAbsoluteError,MeanAbsoluteError,MeanAbsoluteError,MeanSquaredError,MeanSquaredError,MeanSquaredError,R2Score,R2Score,R2Score,MeanEuclideanDistance
Model,/turtle1/pose/x_next,/turtle1/pose/y_next,/turtle1/pose/theta_next,/turtle1/pose/x_next,/turtle1/pose/y_next,/turtle1/pose/theta_next,/turtle1/pose/x_next,/turtle1/pose/y_next,/turtle1/pose/theta_next,MeanEuclideanDistance
DecisionTreeRegressor,0.39,0.45,0.48,0.31,0.49,0.51,0.96,0.94,0.8,0.7
RandomForestRegressor,0.31,0.32,0.36,0.19,0.22,0.29,0.98,0.97,0.88,0.51
PyTorchModel,1.78,2.13,1.26,4.53,6.73,2.23,0.45,0.22,0.13,2.89
XGBoostRegressorModel,0.02,0.02,0.02,0.0,0.0,0.03,1.0,1.0,0.99,0.03


###  Task 4.3 Select a Model and Visualization  

In [23]:
from _helper_functions import plot_predictions_vs_ground_truth

best_model = models[3]
print(f"Best model: {best_model.name}")

# Plots are saved under plots/
plot_predictions_vs_ground_truth(
    environment=evaluation_environment,
    input_names=inputs,
    output_names=outputs,
    models=models,
)

# save model to disk
best_model.save("model.fml")

Best model: XGBoostRegressorModel
