# Evaluation Regression

This notebook provides tools for evaluating regression model results from Octopus studies.

In [None]:
import os
from pathlib import Path

import altair as alt
import duckdb
import polars as pl
from IPython.display import display
from ipywidgets import Dropdown, interact

## Select Study Directory

Update the `study_path` variable below to point to your study directory:

In [None]:
# Update this path to your study directory
# In this example, we use a path relative to the current working directory
studies_root = os.environ.get("STUDIES_PATH", "./studies")
study_path = os.path.join(studies_root, "basic_regression")  # Change this to your study path

# Convert to absolute path to avoid path resolution issues
study_path_abs = Path(study_path).resolve()
print(f"Using study path: {study_path_abs}")

# Check if path exists
if not Path(study_path_abs).exists():
    raise ValueError(f"Path does not exist: {study_path_abs}. Please update the study_path variable above to point to your actual study directory.")

## Load Data Using DuckDB

In [None]:
# Create DuckDB connection
con = duckdb.connect()
print("DuckDB connection established")

In [None]:
# Load Optuna data
df_optuna = con.execute(
    f"SELECT * FROM read_parquet('{study_path_abs}/*/*/optuna*.parquet', hive_partitioning=true)"
).pl()
print(f"Loaded {len(df_optuna)} optuna records")

In [None]:
# Load predictions data
df_predictions = con.execute(
    f"SELECT * FROM read_parquet('{study_path_abs}/*/*/predictions*.parquet', hive_partitioning=true)"
).pl()

df_predictions_pandas = df_predictions.to_pandas()
print(f"Loaded {len(df_predictions)} prediction records")

In [None]:
# Load feature importances
df_feature_importances = (
    con.execute(
        f"SELECT * FROM read_parquet('{study_path_abs}/*/*/feature-importance*.parquet', hive_partitioning=true)"
    )
    .pl()
    .with_columns(pl.col("experiment_id").cast(pl.Int64))
    .with_columns(pl.col("task_id").cast(pl.Int64))
)
print(f"Loaded {len(df_feature_importances)} feature importance records")

## Setup Interactive Controls

In [None]:
# Extract unique values for dropdowns
unique_id_values = {
    k: sorted(v)
    for k, v in df_predictions.select(pl.all().cast(pl.Utf8))
    .select(["experiment_id", "task_id", "training_id"])
    .unique()
    .to_dict(as_series=False)
    .items()
}

unique_id_values_feature_importance = {
    k: sorted(v)
    for k, v in df_feature_importances.select(pl.all().cast(pl.Utf8))
    .select(["fi_type"])
    .unique()
    .to_dict(as_series=False)
    .items()
}

unique_id_values_optuna = {
    k: sorted(v)
    for k, v in df_optuna.select(pl.all().cast(pl.Utf8))
    .select(["experiment_id", "task_id", "model_type"])
    .unique()
    .to_dict(as_series=False)
    .items()
}

# Prediction vs Ground Truth

In [None]:
@interact(
    experiment_id=Dropdown(options=unique_id_values["experiment_id"], description="Experiment ID:"),
    task_id=Dropdown(options=unique_id_values["task_id"], description="Task ID:"),
    training_id=Dropdown(options=unique_id_values["training_id"], description="Split ID:"),
)
def plot_predictions_vs_ground_truth(experiment_id, task_id, training_id):
    filtered_df = df_predictions.filter(
        (pl.col("experiment_id") == int(experiment_id))
        & (pl.col("task_id") == int(task_id))
        & (pl.col("training_id") == training_id)
    )

    # Create line data for diagonal reference line
    line_data = pl.DataFrame(
        {
            "x": [
                df_predictions["target"].min(),
                df_predictions["target"].max(),
            ],
            "y": [
                df_predictions["target"].min(),
                df_predictions["target"].max(),
            ],
        }
    )

    # Create the main chart
    main_chart = (
        alt.Chart(filtered_df)
        .mark_point()
        .encode(
            x=alt.X("target", title="Ground truth"),
            y=alt.Y("prediction", title="Prediction"),
            color="split",
        )
    )

    # Create the diagonal line layer
    line_layer = alt.Chart(line_data).mark_line(strokeDash=[6, 4], color="black").encode(x="x", y="y")

    # Combine the main chart with the line layer
    final_chart = main_chart + line_layer

    # Apply configurations
    final_chart = final_chart.properties(width=600, height=400).configure_axis(titleFontSize=14, labelFontSize=12)

    display(final_chart)

# Feature Importance

In [None]:
@interact(
    experiment_id=Dropdown(options=unique_id_values["experiment_id"], description="Experiment ID:"),
    task_id=Dropdown(options=unique_id_values["task_id"], description="Task ID:"),
    training_id=Dropdown(options=unique_id_values["training_id"], description="Split ID:"),
    fi_type=Dropdown(options=unique_id_values_feature_importance["fi_type"], description="FI Type:"),
)
def plot_feature_importance(experiment_id, task_id, training_id, fi_type):
    df_fi_plot = df_feature_importances.filter(
        (pl.col("experiment_id") == int(experiment_id))
        & (pl.col("task_id") == int(task_id))
        & (pl.col("training_id") == training_id)
        & (pl.col("fi_type") == fi_type)
    )

    chart_fi = (
        alt.Chart(df_fi_plot)
        .mark_bar()
        .encode(
            x=alt.X(
                "feature",
                title="Feature",
                sort=alt.SortField("importance", order="descending"),
            ),
            y=alt.Y("importance", title="Importance"),
            tooltip=["feature", "importance"],
        )
        .properties(title="Feature Importance", width=600, height=400)
    )

    display(chart_fi)

# Optuna Insights

### Number of Unique Trials by Model Type

In [None]:
# Group by experiment_id, task_id, and model_type
df_chart_optuna_count = (
    df_optuna.group_by(["experiment_id", "task_id", "model_type"])
    .agg(pl.col("trial").n_unique().alias("trial_count"))
    .sort(["task_id", "experiment_id"])
)

# Create the base chart
base = (
    alt.Chart(df_chart_optuna_count)
    .mark_bar()
    .encode(
        x=alt.X("model_type:N", title="Model Type", axis=alt.Axis(labelAngle=-45)),
        y=alt.Y("trial_count:Q", title="Number of Unique Trials"),
        color=alt.Color("model_type:N", legend=None),
    )
    .properties(width=180, height=120)
)

# Create the faceted chart
chart_optuna_count = base.facet(row="task_id:N", column="experiment_id:N").properties(
    title="Number of Unique Trials by Model Type, Task ID, and Experiment ID"
)

# Adjust the spacing of the facets
chart_optuna_count = chart_optuna_count.configure_facet(spacing=10)
display(chart_optuna_count)

### Optuna Trials: Object Value and Best Value

In [None]:
def get_best_optuna_trials(df, direction="maximize"):
    if direction == "maximize":
        df_optuna_trials_best = (
            df.with_columns(pl.col("value").cum_max().alias("cummax"))
            .filter(pl.col("value") == pl.col("cummax"))
            .drop("cummax")
        )
    else:
        df_optuna_trials_best = (
            df.with_columns(pl.col("value").cum_min().alias("cummin"))
            .filter(pl.col("value") == pl.col("cummin"))
            .drop("cummin")
        )

    return df_optuna_trials_best


@interact(
    experiment_id=Dropdown(options=unique_id_values["experiment_id"], description="Experiment ID:"),
    task_id=Dropdown(options=unique_id_values["task_id"], description="Task ID:"),
)
def plot_optuna_trials(experiment_id, task_id):
    df_optuna_filtered = df_optuna.filter(
        (pl.col("experiment_id") == int(experiment_id)) & (pl.col("task_id") == int(task_id))
    )

    df_best_optuna_trails = get_best_optuna_trials(df_optuna_filtered, "minimize")

    # Create the scatter plot for object values
    scatter = (
        alt.Chart(df_optuna_filtered)
        .mark_point(size=60)
        .encode(
            x="trial:Q",
            y=alt.Y("value:Q", scale=alt.Scale(type="log")),
            color=alt.Color("model_type:N", legend=alt.Legend(title="Model Type")),
            tooltip=["trial", "value", "model_type"],
        )
        .properties(width=600, height=400)
    )

    # Create the line plot for best values
    line = (
        alt.Chart(df_best_optuna_trails)
        .mark_line(color="green")
        .encode(x="trial:Q", y=alt.Y("value:Q", scale=alt.Scale(type="log")))
    )

    # Combine the scatter and line plots
    chart_optuna_best_value = (scatter + line).properties(title="Optuna Trials: Object Value and Best Value")

    display(chart_optuna_best_value)

### Optuna Hyperparameters

In [None]:
@interact(
    experiment_id=Dropdown(options=unique_id_values["experiment_id"], description="Experiment ID:"),
    task_id=Dropdown(options=unique_id_values["task_id"], description="Task ID:"),
    model_type=Dropdown(options=unique_id_values_optuna["model_type"], description="Model:"),
)
def plot_optuna_hyperparameters(experiment_id, task_id, model_type):
    df_optuna_hp = df_optuna.filter(
        (pl.col("experiment_id") == int(experiment_id))
        & (pl.col("task_id") == int(task_id))
        & (pl.col("model_type") == model_type)
    )

    param_list = df_optuna_hp.select(pl.col("hyper_param")).unique().to_series().to_list()
    param_list = sorted(param_list)
    num_groups = len(param_list)
    plots_per_row = 2
    num_rows = (num_groups // plots_per_row) + (num_groups % plots_per_row > 0)

    base_optuna_hp = (
        alt.Chart(df_optuna_hp.to_pandas())  # convert to pandas for Altair
        .mark_point()
        .encode(
            x=alt.X("param_value:Q", title="Parameter Value"),
            y=alt.Y("value:Q", title="Target Metric"),
            color=alt.Color(
                "trial:Q",
                scale=alt.Scale(scheme="blues"),
                legend=alt.Legend(title="Trial"),
            ),
            tooltip=["hyper_param", "param_value", "value", "trial"],
        )
    )

    charts_optuna_hp = alt.vconcat()
    for row in range(num_rows):
        row_charts = alt.hconcat()
        for col in range(plots_per_row):
            idx = row * plots_per_row + col
            if idx < num_groups:
                param = param_list[idx]
                chart_optuna_hp = base_optuna_hp.transform_filter(alt.datum.hyper_param == param).properties(
                    title=param, width=300, height=200
                )
                row_charts |= chart_optuna_hp
        charts_optuna_hp &= row_charts

    final_chart_optuna_hp = charts_optuna_hp.resolve_scale(color="independent")

    display(final_chart_optuna_hp)