In [2]:
import torch
from gym_runner.gym_runner import GymRunner
from gym_runner.q_func_approx import QFuncMedThreelayer, QFuncFree
from gym_runner.agents.sarsa_agent import SarsaAgent
from gym_runner.agents.q_learning_agent import QLearningAgent, QLearningAgentExperienceReplay
import pandas as pd
import altair as alt



In [3]:
runner = GymRunner("CartPole-v1", display_metrics=True)
num_actions = runner.env.action_space.n
state_dim = runner.env.observation_space.shape[0]

In [4]:
q_func = QFuncMedThreelayer(
    num_actions = num_actions,
    state_dim = state_dim,
    optimizer = 'Adam',
    loss_func = 'mse',
    alpha = 0.01
)

In [5]:
agent = QLearningAgentExperienceReplay(
    q_func, 
    num_actions = num_actions,
    state_dim = state_dim, 
    batch_size = 32
)

In [6]:
train_results = runner.train(agent = agent, )

Epsilon:  0.1375201748252333
Current Reward:  37.0
Episode:  990


In [7]:
test_rewards = runner.attempt(agent, num_episodes=100)

In [8]:
train_results = pd.DataFrame(train_results).reset_index()
train_results.columns = ["episode", "reward"]

test_rewards = pd.DataFrame(test_rewards).reset_index()
test_rewards.columns = ["episode", "reward"]

In [30]:
train_chart = alt.Chart(
    train_results
).mark_point().encode(
    x="episode", 
    y="reward"
).properties(
    title='Accumulated rewards while training'
)

test_chart = alt.Chart(
    test_rewards
).mark_point().encode(
    x="episode", 
    y="reward"
).properties(
    title = 'Accumulated rewards while attempting'
)

charts = train_chart | test_chart

In [33]:
charts.configure_title(
    fontSize=16,
    font='Courier',
    anchor='middle',
    color='black'
)