In [None]:
%load_ext autoreload 
%autoreload 2

from tqdm import tqdm
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import shap
import mlflow

import utils as U

In [None]:
df = pd.read_csv("../../models/evaluation/LGBMRegressor_11.csv")
model_name = "LGBMRegressor/11"

## E2E Performance

In [None]:
df["absolute_error"] = np.abs(df.y_pred - df.y_true)
melted_df = df[["attraction", "y_true", "y_pred"]].melt(id_vars=["attraction"])
nonan_df = df.dropna(axis="index", how="any", subset=["y_true", "y_pred"])

In [None]:
reg_metrics = U.regression_metrics(nonan_df.y_true, nonan_df.y_pred)
print(f"missing prediction: {(~df.y_true.isna() & df.y_pred.isna()).sum()/len(df):.2%}")
print(f"spurious prediction: {(df.y_true.isna() & ~df.y_pred.isna()).sum()/len(df):.2%}")
print(f"y_true and y_pred NaN: {(df.y_true.isna() & df.y_pred.isna()).sum()/len(df):.2%}")
print(f"r2 score: {reg_metrics['r2']:.3f}")
print(f"root mean squared error: {reg_metrics['rmse']:.2f}")
print(f"mean absolute error: {reg_metrics['mae']:.2f}")

In [None]:
px.box(
    df,
    x="attraction",
    y="absolute_error",
    title=(
        f"Absolute E2E Prediction Errors of {model_name} on the test set<br>"
        "<sup>each point is the absolute prediction error for a specific date, time and attraction</sup>"
    ),
    labels={"absolute_error": "absolute error (min)"},
)


In [None]:
px.box(
    melted_df,
    x="attraction",
    y="value",
    color="variable",
    title="E2E predicted vs actual waiting times per attraction",
    labels={"value": "waiting time (min)", "variable": "type"},
    height=500,
)


In [None]:
U.regression_scatter_plot(nonan_df, 
    ["Chiapas DIE Wasserbahn", "River Quest", "Crazy Bats", "Taron", "F.L.Y."], f"E2E {model_name} (test set)", col_wrap=3, width=1200, height=800)

In [None]:
U.regression_scatter_plot(
    nonan_df,
    [
        "Bolles Flugschule",
        "Feng Ju Palace",
        "Verrücktes Hotel Tartüff",
        "Würmling Express",
        "Black Mamba",
        "Wellenflug",
    ],
    f"E2E {model_name} (test set)",
    col_wrap=3,
    width=1200,
    height=800,
)
