## Pessimism Experiments

In [None]:
# Uncomment below lines and run this cell to install neuronav and requirements for colab.

#!git clone https://github.com/awjuliani/neuro-nav
#!pip install ./neuro-nav[experiments_remote]

In [None]:
from neuronav.envs.grid_env import GridEnv, GridSize, GridObsType, OrientationType
from neuronav.envs.graph_env import GraphEnv
from neuronav.agents.td_agents import TDSR, TDQ
from neuronav.agents.pessimistic_agents import PessimisticTDQ, PessimisticMBV
from neuronav.envs.grid_topographies import GridTopography
from neuronav.envs.graph_structures import GraphStructure
from neuronav.utils import run_episode
import matplotlib.pyplot as plt
import numpy as np

In [None]:
reward_map = {(1, 5): 1.0, (5, 5): -1.0, (5, 4): -1.0, (5, 6): -1.0}
start_pos = (9, 5)

env = GridEnv(grid_size=GridSize.small)
obs = env.reset(reward_locs=reward_map, agent_pos=start_pos)
env.render()

In [None]:
num_episodes = 200
num_steps = 50
w_value = 0.75

agent = PessimisticMBV(
    env.state_size,
    env.action_space.n,
    lr=5e-2,
    poltype="egreedy",
    epsilon=0.25,
    gamma=0.95,
    w_value=w_value,
)

total_steps = []
tot_returns = []
for i in range(num_episodes):
    agent, steps, tot_return = run_episode(
        env, agent, max_steps=num_steps, reward_locs=reward_map, start_pos=start_pos
    )
    total_steps.append(steps)
    tot_returns.append(tot_return)

tot_returns = np.array(tot_returns).reshape(-1, 20).mean(-1)
total_steps = np.array(total_steps).reshape(-1, 20).mean(-1)
plt.plot(tot_returns)
plt.xlabel("Episodes")
plt.ylabel("Return")
plt.show()
plt.plot(total_steps)
plt.xlabel("Episodes")
plt.ylabel("Timesteps")
plt.show()

In [None]:
V = agent.Q.mean(0)
print(V.min(), V.max())
plt.imshow(V.reshape(env.grid_size, env.grid_size), cmap="RdBu", vmin=-1.0, vmax=1.0)
plt.colorbar()