In [None]:
# @markdown <center><img src="https://raw.githubusercontent.com/flowcean/flowcean/d5a716008071e5d1174b4851e59695a6170b8ca3/docs/assets/logo_text_blue.svg" height="150" /></center>
# @markdown <center><h1>IPIN 2025 - Flowcean Hands-On Session</h1></center>
# @markdown <center>Run this cell to install and setup</center>
! pip install --quiet flowcean==0.7.0b2

! [ -d "ipin2025-workshop" ] || git clone --quiet https://github.com/flowcean/ipin2025-workshop.git
import os
os.chdir("ipin2025-workshop")

## Section 1 : Load and Prepare the Training Data

In [None]:
import flowcean.cli

config = flowcean.cli.initialize()


### Task 1.1 Load Rosbags and Choose Inputs

In [None]:
from flowcean.polars import DataFrame

topics = {
    "/turtle1/cmd_vel": ["linear.x", "angular.z"],
    "/turtle1/pose": ["x", "y", "theta"],
}
# show current data structure without transforms
rosbag_train = DataFrame.from_rosbag(config.rosbag.training_path, topics=topics)
print(rosbag_train.observe().collect())

### Task 1.2 Create Training Data Frame

In [None]:
from _helper_functions import ShiftInTime
from flowcean.polars import DataFrame, ExplodeTimeSeries, ZeroOrderHold

transforms = (
    ZeroOrderHold(
        features=[
            "/turtle1/cmd_vel",
            "/turtle1/pose",
        ],
        name="measurements",
    )
    | ExplodeTimeSeries("measurements")
    | ShiftInTime(
        features=["/turtle1/pose/x", "/turtle1/pose/y", "/turtle1/pose/theta"],
        steps=1,
        suffix="_next",
    )
)

training_environment = (
    DataFrame.from_rosbag(config.rosbag.training_path, topics=topics) | transforms
)
evaluation_environment = (
    DataFrame.from_rosbag(config.rosbag.evaluation_path, topics=topics) | transforms
)
print(training_environment.observe().collect())

## Section 2 : Select Learners across Libraries 

In [None]:
inputs = [
    "/turtle1/pose/x",
    "/turtle1/pose/y",
    "/turtle1/pose/theta",
    "/turtle1/cmd_vel/linear.x",
    "/turtle1/cmd_vel/angular.z",
]
outputs = [
    "/turtle1/pose/x_next",
    "/turtle1/pose/y_next",
    "/turtle1/pose/theta_next",
]

### Task 2.1 Learner configuration

In [None]:
from flowcean.sklearn import RandomForestRegressorLearner, RegressionTree
from flowcean.torch import LightningLearner, MultilayerPerceptron
from flowcean.xgboost import XGBoostRegressorLearner

regression_tree = RegressionTree(max_leaf_nodes=config.training.tree.max_leaf_nodes)

random_forest = RandomForestRegressorLearner(
    n_estimators=config.training.forest.n_estimators,
    max_depth=config.training.forest.max_depth,
)

mlp = LightningLearner(
    module=MultilayerPerceptron(
        learning_rate=config.training.mlp.learning_rate,
        output_size=len(outputs),
    ),
    batch_size=config.training.mlp.batch_size,
    max_epochs=config.training.mlp.max_epochs,
    accelerator="cpu",
)

xgb = XGBoostRegressorLearner()

### Task 2.2 Prepare Sequential Learning

In [None]:
learners = [
    regression_tree,
    random_forest,
    mlp,
    xgb,
]

## Section 3: Training of the Models

### Task 3.1 Create a Sequential Learning Loop

In [None]:
from flowcean.core import learn_offline

models = []
for learner in learners:
    print(f"Training model: {learner.name}")
    model = learn_offline(
        training_environment,
        learner,
        inputs=inputs,
        outputs=outputs,
    )
    models.append(model)


## Section 4 : Evaluation and Model Comparison

### Task 4.1 Chose Metrics for Evaluation

In [None]:
from euclidean_distance import MeanEuclideanDistance
from flowcean.sklearn import MeanAbsoluteError, MeanSquaredError, R2Score

metrics = [
    MeanAbsoluteError(),
    MeanSquaredError(),
    R2Score(),
    MeanEuclideanDistance(
        features=["/turtle1/pose/x_next", "/turtle1/pose/y_next"],
    ),
]

### Task 4.2 Create an Evaluation Loop

In [None]:
from flowcean.core import evaluate_offline

report = evaluate_offline(
    models,
    environment=evaluation_environment,
    metrics=metrics,
    inputs=inputs,
    outputs=outputs,
)
report.great_table()

###  Task 4.3 Select a Model and Visualization  

In [None]:
from _helper_functions import plot_predictions_vs_ground_truth

best_model = models[3]
print(f"Best model: {best_model.name}")

# Plots are saved under plots/
plot_predictions_vs_ground_truth(
    environment=evaluation_environment,
    input_names=inputs,
    output_names=outputs,
    models=models,
)

# save model to disk
best_model.save("model.fml")