#### Export Metrics for Visualization in tikz

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd

base_folder = Path("G:/3D-GeoInfo-2025/data/4_predictions")
output_folder = Path("G:/3D-GeoInfo-2025/data/5_evaluation")
output_folder.mkdir(parents=True, exist_ok=True)

seeds = np.arange(5)
epochs = np.arange(20) + 1
resolutions = ["2_5_cm", "5_cm", "7_5_cm", "10_cm"]
experiments = ["manual_labeling_small",
               "manual_labeling_ext",
               "manual_correction_small",
               "manual_correction_ext",
               "automatic_labeling_small",
               "automatic_labeling_ext"]
experiment_abbreviations = {
    "manual_labeling_small": "MLS",
    "manual_labeling_ext": "MLE",
    "manual_correction_small": "MCS",
    "manual_correction_ext": "MCE",
    "automatic_labeling_small": "ALS",
    "automatic_labeling_ext": "ALE",
}

for resolution in resolutions:
    metrics_barchart = []
    for idx, experiment in enumerate(experiments):
        best_f1_score = -1
        best_epoch = -1
        best_seed = -1

        experiment_metrics = []
        metrics_line_chart = []
        for epoch in epochs:
            metrics = []
            for seed in seeds:
                current_metrics = []
                for subset in ["train", "test"]:
                    subset_name = subset.capitalize()
                    metrics_file = base_folder / resolution / experiment / f"{epoch}_epochs" / f"{subset}_metrics_seed_{seed}.csv"
                    metr = pd.read_csv(metrics_file)
                    metr = pd.DataFrame([
                        metr["score"].to_numpy()],
                        columns=[f"{subset_name}{metric_name.capitalize()}" for metric_name in metr["metric"].to_list()]
                    )
                    metr.rename({f"{subset_name}F1": f"{subset_name}FScore"}, inplace=True, axis=1)
                    if subset == "train":
                        metr["Epochs"] = epoch
                        metr["Seed"] = seed

                    if subset == "test" and metr["TestFScore"].iloc[0] > best_f1_score:
                        best_f1_score = metr["TestFScore"].iloc[0]
                        best_epoch = epoch
                        best_seed = seed
                    current_metrics.append(metr)

                metrics.append(pd.concat(current_metrics, axis=1))

            metrics_boxplot = pd.concat(metrics)
            metrics_boxplot.to_csv(output_folder / f"{resolution}_{experiment}_{epoch}_epochs_boxplot.csv", index=False)

            metr = {
                "Epochs": epoch,
            }
            for metric_name in ["FScore", "Precision", "Recall"]:
                for subset_name in ["Train", "Test"]:
                    metr[f"{experiment_abbreviations[experiment]}{subset_name}{metric_name}"] = metrics_boxplot[f"{subset_name}{metric_name}"].to_numpy().mean()
                    metr[f"{experiment_abbreviations[experiment]}{subset_name}{metric_name}Std"] = metrics_boxplot[f"{subset_name}{metric_name}"].to_numpy().std()

            metrics_line_chart.append(metr)

            if epoch == epochs.max():
                metr = {
                    "Index": idx,
                    "Experiment": experiment,
                    "Epochs": epoch,
                }

                for metric_name in ["FScore", "Precision", "Recall"]:
                    for subset_name in ["Train", "Test"]:
                        metr[f"{subset_name}{metric_name}"] = metrics_boxplot[f"{subset_name}{metric_name}"].to_numpy().mean()
                        metr[f"{subset_name}{metric_name}Std"] = metrics_boxplot[f"{subset_name}{metric_name}"].to_numpy().std()
                metrics_barchart.append(metr)

        if idx == 0:
            metrics_line_chart_df = pd.DataFrame(metrics_line_chart)
        else:
            metrics_line_chart_df = pd.concat((metrics_line_chart_df, pd.DataFrame(metrics_line_chart)), axis=1)

        print("----------------")
        print(experiment, resolution)
        print("best F1-score", best_f1_score)
        print("best epoch:", best_epoch)
        print("best seed:", best_seed)
        print("----------------")

    for subset in ["test"]:
        subset_name = subset.capitalize()
        baseline_metrics_file = base_folder / resolution / "baseline" / f"{subset}_metrics.csv"
        baseline_metrics = pd.read_csv(baseline_metrics_file)
        baseline_metrics = pd.DataFrame([
            baseline_metrics["score"].to_numpy()],
            columns=baseline_metrics["metric"].to_list()
        )
        metrics_line_chart_df[f"NoFT{subset_name}FScore"] = baseline_metrics["f1"].iloc[0]
        metrics_line_chart_df[f"NoFT{subset_name}Precision"] = baseline_metrics["precision"].iloc[0]
        metrics_line_chart_df[f"NoFT{subset_name}Recall"] = baseline_metrics["recall"].iloc[0]

    metrics_line_chart_df.to_csv(output_folder / f"linechart_{resolution}.csv", index=False)

    metrics_barchart_df = pd.DataFrame(metrics_barchart)
    metrics_barchart_df.to_csv(output_folder / f"barchart_{resolution}.csv", index=False)

In [None]:
for resolution in ["2_5_cm", "5_cm", "7_5_cm", "10_cm"]:
    metrics = pd.read_csv(output_folder / f"linechart_{resolution}.csv")
    print("baseline:", np.round(metrics["NoFTTestFScore"].unique(), 2))
    print("metrics", np.round(metrics[["MLSTestFScore", "MCSTestFScore", "ALSTestFScore"]].max(), 2))



In [None]:
for resolution in ["2_5_cm", "5_cm", "7_5_cm", "10_cm"]:
    metrics = pd.read_csv(output_folder / f"linechart_{resolution}.csv")
    print("metrics", np.round(metrics[["MLETestFScore", "MCETestFScore", "ALETestFScore"]].max(), 2))


In [None]:
for resolution in ["2_5_cm", "5_cm", "7_5_cm", "10_cm"]:
    metrics = pd.read_csv(output_folder / f"linechart_{resolution}.csv")
    print("metrics", np.round(metrics[["MLSTrainFScore", "MCSTrainFScore", "ALSTrainFScore"]].max(), 2))