In [1]:
import torch
from gym_runner.gym_runner import GymRunner
from gym_runner.q_func_approx import QFuncMedThreelayer, QFuncFree
from gym_runner.agents.sarsa_agent import SarsaAgent
from gym_runner.agents.q_learning_agent import QLearningAgent, QLearningAgentExperienceReplay
import pandas as pd
import altair as alt



In [2]:
runner = GymRunner("CartPole-v1", display_metrics=True)
num_actions = runner.env.action_space.n
state_dim = runner.env.observation_space.shape[0]

In [3]:
q_func = QFuncMedThreelayer(
    num_actions = num_actions,
    state_dim = state_dim,
    optimizer = 'Adam',
    loss_func = 'mse',
    alpha = 0.01
)

In [4]:
agent = QLearningAgentExperienceReplay(
    q_func, 
    num_actions = num_actions,
    state_dim = state_dim, 
    batch_size = 32
)

In [5]:
train_results = runner.train(agent = agent, )

Epsilon:  0.1375201748252333
Current Reward:  237.0
Episode:  990


In [6]:
test_rewards = runner.attempt(agent, num_episodes=100)

In [7]:
train_results = pd.DataFrame(train_results).reset_index()
train_results.columns = ["episode", "reward"]

test_rewards = pd.DataFrame(test_rewards).reset_index()
test_rewards.columns = ["episode", "reward"]

In [8]:
alt.Chart(train_results).mark_point().encode(x="episode", y="reward") | alt.Chart(
    test_rewards
).mark_point().encode(x="episode", y="reward")


In [9]:
a = []
a.append(torch.Tensor([1,2,3]))
a.append(torch.Tensor([4,5,6]))
a.append(torch.Tensor([7,8,9]))
a

[tensor([1., 2., 3.]), tensor([4., 5., 6.]), tensor([7., 8., 9.])]

In [10]:
torch.stack(a)

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])