In [None]:
%load_ext autoreload 
%autoreload 2

from tqdm import tqdm
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import shap
import mlflow

import utils as U
from src.features.build_features import FEATURE_MATRIX_COLUMNS

In [None]:
df = pd.read_csv("../../models/evaluation/LGBMRegressor_9.csv")
model_name = "LGBMRegressor/9"

df["absolute_error"] = np.abs(df.y_pred - df.y_true)
melted_df = df[["attraction", "y_true", "y_pred"]].melt(id_vars=["attraction"])
nonan_df = df.dropna(axis="index", how="any", subset=["y_true", "y_pred"]).reset_index(drop=True)

In [None]:
%cd ../..
mlflow.set_tracking_uri(U.MLFLOW_TRACKING_URI)

model = mlflow.sklearn.load_model("models:/LGBMRegressor/9")
preprocessing = model[:-1]
lgbm = model[-1]

In [None]:
X_test_p = preprocessing.transform(nonan_df.drop(columns=["y_true", "y_pred", "absolute_error"]))


In [None]:
X_test_p.shape

In [None]:
explainer = shap.Explainer(lgbm)
shap_values = explainer.shap_values(X_test_p)

In [None]:
nonan_df.shape, shap_values.shape

In [None]:
COLS_NO_WEATHER = ["attraction", "date", "half_hour_time", "y_pred", "y_true", "absolute_error"]

In [None]:
worst_predictions_df = nonan_df.sort_values(by="absolute_error", ascending=False).head(20)[COLS_NO_WEATHER]

In [None]:
worst_predictions_df

In [None]:
shap.plots.force(explainer.expected_value, shap_values[25406], features=X_test_p[25406], feature_names=FEATURE_MATRIX_COLUMNS)

In [None]:
shap.force_plot(explainer.expected_value, shap_values[9155], features=X_test_p.loc[9155], feature_names=FEATURE_MATRIX_COLUMNS)

In [None]:
shap.force_plot(explainer.expected_value, shap_values[9156], features=X_test_p.loc[9156], feature_names=FEATURE_MATRIX_COLUMNS)

In [None]:
shap.force_plot(explainer.expected_value, shap_values[9158], features=X_test_p.loc[9158], feature_names=FEATURE_MATRIX_COLUMNS)

In [None]:
shap.force_plot(explainer.expected_value, shap_values[9157], features=X_test_p.loc[9157], feature_names=FEATURE_MATRIX_COLUMNS)