In [1]:
import torch
import pandas as pd 
import numpy as np 
import altair as alt 
print(torch.__version__)

import os
from gym_runner.gym_runner import GymRunner
from gym_runner.q_func_approx import QFuncSmallThreelayer, QFuncLargeTwoLayer, QFuncMedThreelayer
from gym_runner.agents.q_learning_agent import  QLearningAgentExperienceReplay


1.10.0


In [2]:
runner = GymRunner("LunarLander-v2", display_metrics=True)
num_actions = runner.env.action_space.n
state_dim = runner.env.observation_space.shape[0]

In [3]:
q_func = QFuncMedThreelayer(
    num_actions = num_actions,
    state_dim = state_dim,
    optimizer = 'Adam',
    loss_func = 'mse',
    alpha = 0.01
)

In [4]:
agent = QLearningAgentExperienceReplay(
    q_func, 
    num_actions = num_actions,
    state_dim = state_dim, 
    batch_size = 32
)

In [5]:
train_results = runner.train(agent = agent, )

[H[2JEpsilon:  0.1375201748252333
Current Reward:  -565.0524969790993
Episode:  990


In [6]:
test_rewards = runner.attempt(agent, num_episodes=100)

In [7]:
train_results = pd.DataFrame(train_results).reset_index()
train_results.columns = ["episode", "reward"]

test_rewards = pd.DataFrame(test_rewards).reset_index()
test_rewards.columns = ["episode", "reward"]

In [8]:
train_chart = alt.Chart(
    train_results
).mark_point().encode(
    x="episode", 
    y="reward"
).properties(
    title='Accumulated rewards while training'
)

test_chart = alt.Chart(
    test_rewards
).mark_point().encode(
    x="episode", 
    y="reward"
).properties(
    title = 'Accumulated rewards while attempting'
)

charts = train_chart | test_chart



In [9]:
charts.configure_title(
    fontSize=16,
    font='Courier',
    anchor='middle',
    color='black'
)