In [None]:
from parser import get_records
import pandas as pd
from pathlib import Path

root = Path("../../external_parser/unit_tests/test_files/valid_joined_logs")

# CB

## Load records

In [None]:
records = pd.DataFrame(get_records(root.joinpath("average_reward_100_interactions.fb")))
records.head()

## Get decisions and outcomes

In [None]:
decisions = records[records["type"] == "CB"]
outcomes = records[records["type"] == "Outcome"]
decisions = (
    pd.DataFrame([row["message"] for i, row in decisions.iterrows()])
    .reset_index()
    .rename(columns={"index": "order"})
    .set_index(["id"])
    .sort_values("order")
)
outcomes = (
    pd.DataFrame([row["message"] for i, row in outcomes.iterrows()])[["id", "reward"]]
    .groupby("id")
    .mean()
)

In [None]:
decisions.head()

In [None]:
outcomes.head()

## Join

In [None]:
joined = decisions.join(outcomes[["reward"]], how="left").sort_values("order")
joined["a"] = joined.apply(lambda row: row["actions"][0] - 1, axis=1)
joined["p"] = joined.apply(lambda row: row["probs"][0], axis=1)
joined.head()

# Multistep

## Load records

In [None]:
records = pd.DataFrame(get_records(root.joinpath("multistep_2_episodes.fb")))
records.head()

In [None]:
decisions = records[records["type"] == "MultiStep"]
outcomes = records[records["type"] == "Outcome"]

decisions = (
    pd.DataFrame([row["message"] for i, row in decisions.iterrows()])
    .reset_index()
    .rename(columns={"level_0": "order"})
    .set_index(["id", "index"])
    .sort_values("order")
)
outcomes = pd.DataFrame([row["message"] for i, row in outcomes.iterrows()])
outcomes_episodic = (
    outcomes[outcomes["index"].isna()][["id", "reward"]].groupby(["id"]).mean()
)
outcomes_per_step = (
    outcomes[outcomes["index"].notna()][["id", "index", "reward"]]
    .groupby(["id", "index"])
    .mean()
)

In [None]:
decisions

In [None]:
outcomes_episodic

In [None]:
outcomes_per_step

## Join

In [None]:
joined = (
    decisions.join(
        outcomes_episodic[["reward"]], on="id", how="left", rsuffix="_episodic"
    )
    .join(outcomes_per_step[["reward"]], how="left", rsuffix="_per_step")
    .sort_values("order")
)
joined["a"] = joined.apply(lambda row: row["actions"][0] - 1, axis=1)
joined["p"] = joined.apply(lambda row: row["probs"][0], axis=1)
joined.head()