In [None]:
import polars as pl
import plotly.express as px
from src.settings import Settings
from pathlib import Path

settings = Settings()

In [None]:
features_df = pl.read_parquet("cache/df_features.parquet")
targets_df = pl.read_parquet("cache/df_targets.parquet")

In [None]:
episodes = [
    group.drop("episode_id")
    for _, group in targets_df.group_by("episode_id", maintain_order=True)
]

# 1) Target distribution over each episode in the dataset

In [None]:
for i, episode in enumerate(episodes):
    if i >= 0 and i < 5:
        px.histogram(episode, nbins=1000, title=f"Episode {i}").show()

# 2) Target distribution across actions for any observation in any episode

In [None]:
EPISODE_INDEX = 0
NUMBER_ACTIONS_SAMPLED = 50
OBSERVATION_INDEX = 5
episode_df = episodes[EPISODE_INDEX]
partitions = [
    episode_df[i : i + NUMBER_ACTIONS_SAMPLED]
    for i in range(0, episode_df.height, NUMBER_ACTIONS_SAMPLED)
]

In [None]:
px.histogram(
    partitions[OBSERVATION_INDEX],
    nbins=50,
    title=f"Episode {EPISODE_INDEX} - Observation {OBSERVATION_INDEX}",
).show()

# 3) Visualise preds test vs actuals

In [None]:
df_test, df_train = pl.read_parquet(
    settings.artifacts_dir / Path("sequential_regressor") / "preds_vs_actuals_test.parquet"
), pl.read_parquet(
    settings.artifacts_dir / Path("sequential_regressor") / "preds_vs_actuals_train.parquet"
)

In [None]:
fig = px.histogram(
    data_frame=df_test,
    x=["target", "y_pred"],
    nbins=1000,
    opacity=0.5,
    title="Histogram of predictions & actuals test distributions",
)

fig.update_layout(barmode="overlay")

fig.show()

In [None]:
fig = px.histogram(
    data_frame=df_train,
    x=["target", "y_pred"],
    nbins=1000,
    opacity=0.5,
    title="Histogram of predictions & actuals train distributions",
)

fig.update_layout(barmode="overlay")

fig.show()