In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import truman
from pachinko import time_period_step_agent as agents

import numpy as np
import pandas as pd
import glob

import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = [24, 8]

In [None]:
agent_registry = truman.agent_registration.AgentRegistry()

agent_registry.register(
    id="Periodic:7step-v0",
    entry_point=agents.Periodic,
    kwargs={"period_length": 7}
)

agent_registry.register(
    id="ForgettingPeriodic:7step-defaultdecay-v0",
    entry_point=agents.ForgettingPeriodic,
    kwargs={"period_length": 7}
)

agent_registry.register(
    id="ForgettingPeriodic:7step-lessdecay-v0",
    entry_point=agents.ForgettingPeriodic,
    kwargs={"period_length": 7, "exponential_decay_constant": 1/ 50}
)

agent_registry.register(
    id="Random-v0",
    entry_point=agents.RandomAgent,
)

agent_registry.register(
    id="EpsilonGreedy-v0",
    entry_point=agents.EpsilonGreedy,
)

In [None]:
truman.run(agent_registry, [truman.registry], {"output_directory": "../../truman_store"})

In [None]:
!ls ../../truman_store

In [None]:
summary_df = pd.concat(
    [pd.read_csv(csv) for csv in glob.glob("../../truman_store/*__summary.csv")],
    ignore_index=True,
)

In [None]:
summary_df.sort_values(by=["env_id", "agent_id"])

In [None]:
for parquet in glob.glob("../../truman_store/*Matching_sin7:conv_1:0.002*.parquet"):
    print(parquet)
    history_df = pd.read_parquet(parquet)
    history_df["cum_reward"] = history_df["reward"].cumsum()
    truman.plot.plot(history_df, use_cols=["action", "reward", "cum_reward", "agent_ucb_selected_action"])
    plt.show()

In [None]:
for parquet in glob.glob("../../truman_store/*Matching_sin7:conv_1:0.2*.parquet"):
    print(parquet)
    history_df = pd.read_parquet(parquet)
    history_df["cum_reward"] = history_df["reward"].cumsum()
    truman.plot.plot(history_df, use_cols=["action", "cum_reward", "agent_ucb_selected_action", "reward"])
    plt.show()

In [None]:
for parquet in glob.glob("../../truman_store/*NonStationaryTrend:conv_1:0.02*.parquet"):
    print(parquet)
    history_df = pd.read_parquet(parquet)
    history_df["cum_reward"] = history_df["reward"].cumsum()
    #truman.plot.plot(history_df, use_cols=["action", "reward", "cum_reward", "agent_ucb_selected_action"])
    truman.plot.plot(history_df, use_cols="all")
    plt.show()