In [None]:
###################################################################################
#                                                                                 #
#                               ###########################                       #
#                               IPIN 2025 FLOWCEAN WORKSHOP                       #
#                               ###########################                       #
#                                                                                 #
# ------------------------------------------------------------------------------- #
#                                                                                 #
# This is the tutorial file for the IPIN 2025 workshop. In this Jupyter notebook, #
# we will go throught the steps of training machine learning models with the      #
# Flowcean framework.                                                             #
#                                                                                 #
###################################################################################

# some imports we will need
import logging

logger = logging.getLogger(__name__)

In [None]:
##################################################
# SECTION 1 : LOAD AND PREPARE THE TRAINING DATA #
##################################################

# Import flowcean and cli
import flowcean
import flowcean.cli

# The function below looks for a config.yaml in the current directory
# In the config.yaml, we specify settings for our training run 
config = flowcean.cli.initialize()

# Import some helper functions for loading ROS data
from os import PathLike
from _collections_abc import Iterable
from flowcean.ros import load_rosbag
from examples.turtlesim.deinemudda import shift_in_time

# import transforms
from flowcean.polars import DataFrame, ExplodeTimeSeries, ZeroOrderHold
from flowcean.core.transform import Lambda


In [None]:
##################################################
# SECTION 1 : LOAD AND PREPARE THE TRAINING DATA #
##################################################

def load_and_process_rosbag(
    path: str | PathLike,
    message_paths: Iterable[str | PathLike] | None = None,
) -> DataFrame:
    logger.info("Loading rosbag from: %s", path)


    ##############################################
    # TASK 1.1 LOAD ROSBAGS AND CHOSE INPUTS     #
    ##############################################
    #                                            #
    # Call the load_rosbag function and pass:    #
    #   - the bag_path                           #
    #   - requires topics and their fields       #
    #   - the message_path                       #
    #                                            #
    # The topics and fields we use load are:     #
    #   /turtle1/cmd_vel                         #
    #       - linear.x                           #
    #       - angular.z                          #
    #   /turtle1/pose                            #
    #       - x                                  #
    #       - y                                  #
    #       - theta                              #
    ##############################################


    rosbag = load_rosbag(
        # TODO: TASK 1.1
    )

    rosbag = load_rosbag(
        path=path,
        topics={
            "/turtle1/cmd_vel": [
                "linear.x",
                "angular.z",
            ],
            "/turtle1/pose": [
                "x",
                "y",
                "theta",
            ],
        },
        message_paths=message_paths,
    )


    ##############################################
    # TASK 1.2 CREATE TRAINING DATA FRAME        #
    ##############################################
    #                                            #
    # Call the ZeroOrderHold Transform:          #
    #   - our features are our topics            #
    #   - name the new column "measurments"      #
    #                                            #
    # Chain the ExplodeTimeSeries Transform:     #
    #   - Apply the ExplodeTimeSeries transform  #
    #     to the measurement column              #
    #                                            #
    # Chain the Lambda Transform:                #
    #   - pass the function shift_in_time, which #
    #     is imported at the start of the cell   #
    #                                            #
    # ------------------------------------------ #
    #                                            #
    # HINT: You can concatente/chain transforms  #
    #       to a dataframe with the "|" operator #
    #                                            #
    ##############################################


    return (
        DataFrame(rosbag) 
        # TODO: TASK 1.2
    )

    return (
        DataFrame(rosbag)
        | ZeroOrderHold(
            features=[
                "/turtle1/cmd_vel",
                "/turtle1/pose",
            ],
            name="measurements",
        )
        | ExplodeTimeSeries("measurements")
        | Lambda(shift_in_time)
    )


# using our loaded config we want to create training and evaluation samples
samples_train = load_and_process_rosbag(
        config.rosbag.training_path,
        config.rosbag.message_paths,
    )
samples_eval = load_and_process_rosbag(
    config.rosbag.evaluation_path,
    config.rosbag.message_paths,
)

In [None]:
##################################################
# SECTION 2 : SELECT LEARNERS ACCROSS LIBRARIES  #
##################################################

# we load all the learners for our training loop
from flowcean.sklearn import RandomForestRegressorLearner, RegressionTree
from flowcean.torch import LightningLearner, MultilayerPerceptron

In [None]:
##################################################
# SECTION 2 : SELECT LEARNERS ACCROSS LIBRARIES  #
##################################################

##############################################
# TASK 2.1 CONFIGURE A NUMBER OF LEARNERS    #
##############################################
#                                            #
# Initialize a regression tree, a random     #
# forest, and a Lightnig Learner:            #
#   - pass the tree configuration to the     #
#     regression tree                        #
#   - pass the forest configuration to the   #
#     random forest                          #
#   - pass a multilayer perceptron instance  #
#     to the lightning learner and pass both #
#     their according configurations         #
#                                            #
# ------------------------------------------ #
#                                            #
# HINT: We defined our configurations in the #
#       config.yaml file                     #
#                                            #
##############################################


regression_tree = None  # TODO: Task 2.1

random_forest = None    # TODO: Task 2.1

mlp = None              # TODO: Task 2.1

regression_tree = RegressionTree(
    **config.training.tree
)

random_forest = RandomForestRegressorLearner(
    **config.training.forest,
)

mlp = LightningLearner(
    module=MultilayerPerceptron(
        learning_rate=config.training.mlp.learning_rate,
    ),
    batch_size=config.training.mlp.batch_size,
    max_epochs=config.training.mlp.max_epochs,
)

##############################################
# TASK 2.2 PREPARE SEQUENTIAL LEARNING       #
##############################################
#                                            #
# We want to train all of our models in a    #
# looped fassion:                            #
#   - Create dictionary for the learners     #
#   - Create a list that contains all fields #
#     of a topic that are part of the input  #
#   - Create a list that contains all fields #
#     of a topic that are part of the output #
#                                            #
##############################################


learners = {
    # TODO: Task 2.2
}

inputs = [
    # TODO: Task 2.2
]

outputs = [
    # TODO: Task 2.2
]


learners = {
    "regression_tree": regression_tree,
    "random_forest": random_forest,
    "multilayer_perceptron": mlp,
}

inputs = [
    "/turtle1/pose/x",
    "/turtle1/pose/y",
    "/turtle1/pose/theta",
    "/turtle1/cmd_vel/linear.x",
    "/turtle1/cmd_vel/angular.z",
]
outputs = [
    "/turtle1/pose/x_next",
    "/turtle1/pose/y_next",
    "/turtle1/pose/theta_next",
]

In [None]:
##################################################
# SECTION 3 : TRAINING OF THE MODELS             #
##################################################

# we load our learning strategy
from flowcean.core import learn_offline

In [None]:
##################################################
# SECTION 3 : TRAINING OF THE MODELS             #
##################################################

##############################################
# TASK 3.1 CREATE A SEQUENTIAL LEARNING LOOP #
##############################################
#                                            #
# Implement the training loop:               #
#   - call the learn_offline function and    #
#     pass the required parameters           #
#   - store the trained models in a dict     #
#                                            #
# ------------------------------------------ #
#                                            #
# HINT: Our environment consists of samples  #
#                                            #
##############################################


models = {}
for learner_name, learner in learners.items():
    logger.info("Training model: %s", learner_name)

    model = None                # TODO: Task 3.1
    models[learner_name] = None # TODO: Task 3.1

models = {}
for learner_name, learner in learners.items():
    logger.info("Training model: %s", learner_name)
    
    model = learn_offline(
        samples_train,
        learner,
        inputs=inputs,
        outputs=outputs,
    )
    models[learner_name] = model

In [None]:
##################################################
# SECTION 4 : EVALUATION AND MODEL COMPARISON    #
##################################################

# we load our metrics for comparison
from flowcean.sklearn import MaxError, MeanAbsoluteError, MeanSquaredError, R2Score
from custom_metrics.euclidean_distance import MeanEuclideanDistance

# import functions for comparison and visualization
from flowcean.core import evaluate_offline
from flowcean.core.strategies.offline import print_report_table, select_best_model
from examples.turtlesim.deinemudda import plot_predictions_vs_ground_truth

In [None]:
##################################################
# SECTION 4 : EVALUATION AND MODEL COMPARISON    #
##################################################

##############################################
# TASK 4.1 CHOOSE METRICS FOR COMPARISON     #
##############################################
#                                            #
# Define a list of metrics for evaluation:   #
#   - we want to evaluate the maximum error, #
#     mean absolute error, regression score, #
#     and mean euclidean distance            #
#                                            #
# ------------------------------------------ #
#                                            #
# HINT: an euclidean distance is calculated  #
#       between two points                   #
#                                            #
##############################################


metrics = [
    # TODO: 4.1
]

metrics = [
    MaxError(),
    MeanAbsoluteError(),
    MeanSquaredError(),
    R2Score(),
    MeanEuclideanDistance(
        columns=[
            "/turtle1/pose/x_next",
            "/turtle1/pose/y_next",
        ],
    ),
]


##############################################
# TASK 4.2 CREATE AN EVALUAITON LOOP         #
##############################################
#                                            #
# Implement the evaluation loop:             #
#   - call the evaluate_offline function and #
#     pass the required parameters           #
#   - store the reports in a dict for later  #
#                                            #
# ------------------------------------------ #
#                                            #
# HINT: Our environment consists of samples  #
#                                            #
##############################################


reports = {}
for model_name, model in models.items():
    logger.info("Evaluating model: %s", model_name)

    report = None # TODO 4.2
    reports[model_name] = None # TODO : 4.2

    print(report)
    print_report_table(report)

reports = {}
for model_name, model in models.items():
    logger.info("Evaluating model: %s", model_name)
    
    report = evaluate_offline(
        model=model,
        environment=samples_eval,
        metrics=metrics,
        inputs=inputs,
        outputs=outputs,
    )
    reports[model_name] = report

    print(report)
    print_report_table(report)


##############################################
# TASK 4.3 SELECT A MODEL AND VISUALIZATION  #
##############################################
#                                            #
# Select the best model:                     #
#   - call the select_best_model function    #
#     and pass the required parameters       #
#   - we want to compare the mean euclidean  #
#     distance                               #
#   - call the predictions_vs_ground_truth   #
#     function and pass the required         #
#     parameters                             #
#                                            #
# ------------------------------------------ #
#                                            #
# HINT: we can observe and collect samples   #
#                                            #
##############################################


best_model_name = None # TODO: 4.3

logger.info("Best model: %s", best_model_name)

plot_predictions_vs_ground_truth(
    # TODO: 4.3
)


best_model_name = select_best_model(
    reports,
    output_name="multi_output",
    metric_name="MeanEuclideanDistance",
)

logger.info("Best model: %s", best_model_name)

plot_predictions_vs_ground_truth(
    samples_eval=samples_eval.observe().collect(), # FRAGEN: IST .observe().collect() NOTWENDIG? 
    input_names=inputs,
    output_names=outputs,
    models=models,
)

In [None]:
##################################################
# SECTION 5 : ONLY AFTER YOU HAVE DONE ALL TASKS #
##################################################

from examples.turtlesim.deinemudda import surprise

surprise()

ImportError: cannot import name 'surprise' from '_helper_functions' (/ros2_ws/src/flowcean/examples/turtlesim/_helper_functions.py)