In [66]:
import gym
import numpy as np

import time

In [67]:
import sys
import os

sys.path.append(os.path.abspath('../'))

del sys, os

In [68]:
import matplotlib.pyplot as plt

In [69]:
# LaTeX rendering in graphs
from distutils.spawn import find_executable
if find_executable('latex'):
    plt.rc('text', usetex=True)

plt.rc('font', family='serif')

# High resolution graphs
%config InlineBackend.figure_format = 'retina'

In [70]:
import torch

In [71]:
%reload_ext autoreload
%autoreload 2

In [72]:
import models.rnn as rnns
import models.mlp as mlps
import models.linear as linears
import control.agents as agents
import control.environments as env

In [73]:
from utils.notifications import Slack

In [74]:
import copy

# Setup

In [80]:
env_name = 'Taxi-v2'
#env_name = 'Breakout-ram-v0'

In [81]:
environment = env.Environment(
    environment=gym.make(env_name), 
    agent=None,
    verbose=True,
    max_steps=200,
    capacity=100,
    representation_method='one_hot_encoding'
)

In [82]:
model = linears.Linear(
    input_dimension=environment.get_input_dimension(), 
    n_actions=environment.n_actions,
)
optimiser = torch.optim.SGD(model.parameters(), lr=100, momentum=0)
#optimiser = torch.optim.Adam(model.parameters(), lr=1)
agent = agents.DQNAgent(model, optimiser, gamma=1., temperature=10, algorithm='sarsa', n_actions=environment.n_actions)
environment.agent = agent

In [18]:
print(environment.agent.q(environment.state_representation(environment.environment.reset())))
model.load_state_dict(torch.load('../saved/taxi/mlp/state_dict.pth'))
agent.commit()
print(environment.agent.q(environment.state_representation(environment.environment.reset())))
print(environment.boltzmann(environment.state_representation(environment.environment.reset())))

[-152.5814  -151.62679 -152.77357 -152.7423  -151.98988 -152.57753]
[-152.5814  -151.62679 -152.77357 -152.7423  -151.98988 -152.57753]
[0.123688   0.32130238 0.10206328 0.10530633 0.22347164 0.12416831]


# Experiment

## Boltzmann

In [83]:
q_estimation = []
returns = []

iterator = environment.tqdm(range(10), ascii=True, ncols=100)

with iterator as it:
    for _ in it:

        environment.agent.commit()
        returns.append(environment.exploration_segment(100))

        for _ in range(len(environment.replay_memory) // 100):
            environment.batch(100)


  0%|                                                                        | 0/10 [00:00<?, ?it/s]

tensor([ 0.0179,  0.0171, -0.0369,  0.0416, -0.0014,  0.0317],
       grad_fn=<SqueezeBackward3>)
[-0.96207259 -1.03343709 -0.99063242 -1.03538522 -0.97150672 -0.99074415]
2
tensor([ 0.0179,  0.0171, -0.9973,  0.0416, -0.0014,  0.0317],
       grad_fn=<SqueezeBackward3>)
[-0.97159221 -0.99026757 -0.98455094 -1.0332068  -1.04196193 -0.99212728]
2
tensor([ 0.0179,  0.0171, -1.0023,  0.0416, -0.0014,  0.0317],
       grad_fn=<SqueezeBackward3>)
[ -9.96996519 -10.01584831 -10.0068027  -10.0414812   -9.97307436
 -10.03748195]
4
tensor([ 0.0179,  0.0171, -1.0023,  0.0416, -1.0014,  0.0317],
       grad_fn=<SqueezeBackward3>)
[ -9.97640599 -10.01985015  -9.96128758  -9.96761143  -9.98607564
  -9.99611175]
5
tensor([ 0.0179,  0.0171, -1.0023,  0.0416, -1.0014, -0.9683],
       grad_fn=<SqueezeBackward3>)
[-0.9829094  -0.99245035 -1.014271   -0.98509856 -1.03267581 -1.03107423]
3
tensor([ 0.0179,  0.0171, -1.0023, -0.9584, -1.0014, -0.9683],
       grad_fn=<SqueezeBackward3>)
[ -9.9829094   -9.

 10%|######4                                                         | 1/10 [00:03<00:27,  3.10s/it]


0
tensor([-1.0033, -0.9737, -0.9981, -0.9814, -9.9932, -9.9938],
       grad_fn=<SqueezeBackward3>)
[-0.96207259 -1.03343709 -0.99063242 -1.03538522 -0.97150672 -0.99074415]
0


 20%|############8                                                   | 2/10 [00:05<00:23,  2.97s/it]

tensor([-0.9973, -0.9737, -0.9981, -0.9814, -9.9932, -9.9938],
       grad_fn=<SqueezeBackward3>)
[ -2.00438106  -1.9814291   -1.99373287  -1.99377388 -10.9937973
  -8.97786713]
1
tensor([ -0.9973,  -1.9737,  -0.9981,  -0.9814,  -9.9932, -10.3355],
       grad_fn=<SqueezeBackward3>)
[-1.99020624 -2.01022875 -2.0053755  -1.99034154 -8.02492237 -6.03153419]
0
tensor([ -1.9950,  -1.9737,  -0.9981,  -1.3314,  -9.9932, -10.3355],
       grad_fn=<SqueezeBackward3>)
[ -1.98299634  -2.00537229  -1.99379724  -1.9923507  -11.00018692
  -8.9780817 ]
0
tensor([ -2.3274,  -1.9737,  -0.9981,  -1.3314,  -9.9932, -10.3355],
       grad_fn=<SqueezeBackward3>)
[ -2.00438106  -1.9814291   -1.99373287  -1.99377388 -10.9937973
  -8.97786713]
2
tensor([ -2.3274,  -1.9737,  -1.9938,  -1.3314,  -9.9932, -10.3355],
       grad_fn=<SqueezeBackward3>)
[ -1.99431062  -2.00622702  -1.98605126  -1.99218613  -8.97274828
 -10.98605156]
1
tensor([ -2.3274,  -2.3210,  -1.9938,  -1.3314,  -9.9932, -10.3355],
       grad

 30%|###################2                                            | 3/10 [00:08<00:19,  2.75s/it]

tensor([ -2.4723,  -2.4919,  -2.4778,  -2.0165, -10.6512, -10.3355],
       grad_fn=<SqueezeBackward3>)
[ -3.32475543  -3.2699573   -3.21537089  -3.40432787 -10.98052502
  -7.03883505]
2
tensor([ -2.4723,  -2.4919,  -3.3617,  -2.0165, -10.6512, -10.3355],
       grad_fn=<SqueezeBackward3>)
[-12.32475543 -12.2699573  -12.21537089 -12.40432787 -19.98052502
 -16.03883505]
5
tensor([ -2.4723,  -2.4919,  -3.3617,  -2.0165, -11.6512, -11.3355],
       grad_fn=<SqueezeBackward3>)
[-10.99384701 -11.00151372 -11.00325668  -9.970619   -11.02420485
 -10.00516883]
5
tensor([ -2.4723,  -2.4919,  -3.3617,  -2.0165, -11.6512, -10.2291],
       grad_fn=<SqueezeBackward3>)
[-10.99384701 -11.00151372 -11.00325668  -9.970619   -11.02420485
 -10.00516883]
5
tensor([ -2.4723,  -2.4919,  -3.5850,  -2.0165, -11.6512, -10.6664],
       grad_fn=<SqueezeBackward3>)
[-11.89959979 -12.3569715  -12.05792093 -12.45619988 -21.2365551
 -20.98528099]
4
tensor([ -2.4723,  -2.4919,  -3.5850,  -2.0165, -12.3455, -10.6664

 40%|#########################6                                      | 4/10 [00:10<00:15,  2.56s/it]

tensor([ -3.2955,  -2.4919,  -3.2526,  -2.6979, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[-4.42699814 -3.79743171 -3.72416282 -2.83877122 -7.99114037 -6.02328968]
2
tensor([ -3.2955,  -2.4919,  -3.8530,  -2.6979, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[ -4.33536696  -3.62695217  -4.70980144  -3.61470008  -3.48044348
 -12.00377369]
2
tensor([ -3.2955,  -2.4919,  -4.1034,  -2.6979, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[ -4.36791468  -4.10691857  -4.34365368  -4.48067164 -13.51244164
 -12.43747997]
1
tensor([ -3.2955,  -3.4919,  -4.1034,  -2.6979, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[ -4.40190411  -4.02730536  -4.81847334  -4.4457705  -13.27981472
 -11.72342873]
2
tensor([ -3.2955,  -3.4919,  -4.6500,  -2.6979, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[ -4.40190411  -4.02730536  -4.81847334  -4.4457705  -13.27981472
 -11.72342873]
0


 50%|################################                                | 5/10 [00:12<00:12,  2.55s/it]

tensor([ -4.2508,  -3.4919,  -4.6500,  -2.6979, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[-4.32992697 -3.01303077 -4.21582341 -2.33394098 -4.9547255  -6.03572655]
3
tensor([ -4.2508,  -3.4919,  -4.6500,  -3.3564, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[ -5.03548527  -5.40680933  -4.00966144  -5.18695211 -11.99377441
 -13.74424458]
2
tensor([ -4.2508,  -3.4919,  -5.1565,  -3.3564, -12.0122, -10.6664],
       grad_fn=<SqueezeBackward3>)
[-14.45217037 -12.71421814 -14.43859768 -14.21963406 -21.02434444
 -23.06114578]
5
tensor([ -4.2508,  -3.4919,  -5.1565,  -3.3564, -12.0122, -11.6664],
       grad_fn=<SqueezeBackward3>)
[ -5.58132029  -5.39091969  -5.73089504  -4.84982896 -10.97138882
 -11.00168324]
3
tensor([ -4.2508,  -3.4919,  -5.1565,  -4.3564, -12.0122, -11.6664],
       grad_fn=<SqueezeBackward3>)
[ -3.48080134  -4.79577732  -2.83728313  -3.77461457 -10.97450733
 -12.94515991]
0
tensor([ -4.3006,  -3.4919,  -5.1565,  -4.3564, -12.0122, -11.6664],
   

 60%|######################################4                         | 6/10 [00:14<00:09,  2.47s/it]

tensor([ -4.2226,  -5.2234,  -5.1565,  -4.3564, -12.0122, -11.6664],
       grad_fn=<SqueezeBackward3>)
[-14.37650728 -14.65556908 -15.04046869 -15.80024576 -19.99132919
 -20.97054195]
5
tensor([ -4.2226,  -5.2234,  -5.1565,  -4.3564, -12.0122, -12.6664],
       grad_fn=<SqueezeBackward3>)
[ -6.53044128  -6.24065971  -5.80093527  -6.98840237 -14.54314137
 -12.03274155]
0
tensor([ -5.2226,  -5.2234,  -5.1565,  -4.3564, -12.0122, -12.6664],
       grad_fn=<SqueezeBackward3>)
[-13.41071415 -10.99377388 -11.0077014  -12.32188368 -16.02184486
 -17.97669744]
5
tensor([ -5.2226,  -5.2234,  -5.1565,  -4.3564, -12.0122, -12.7331],
       grad_fn=<SqueezeBackward3>)
[ -4.7610805   -5.51906395  -5.84268713  -4.97319102 -10.99509048
 -11.99341011]
1
tensor([ -5.2226,  -5.5905,  -5.1565,  -4.3564, -12.0122, -12.7331],
       grad_fn=<SqueezeBackward3>)
[ -7.04162836  -6.9701438   -6.83094454  -6.85673904 -10.99511147
 -14.03395653]
0
tensor([ -6.2226,  -5.5905,  -5.1565,  -4.3564, -12.0122, -12.733

 70%|############################################8                   | 7/10 [00:17<00:07,  2.52s/it]

tensor([ -7.1998,  -7.0768,  -6.6205,  -4.8983, -14.3513, -15.4462],
       grad_fn=<SqueezeBackward3>)
[-2.67137372 -4.11631441 -3.42951798 -3.99855709 -9.03431034 -7.03336906]
1
tensor([ -7.1998,  -6.5695,  -6.6205,  -4.8983, -14.3513, -15.4462],
       grad_fn=<SqueezeBackward3>)
[ -2.16703653  -5.44837189  -3.39439487  -3.44041491 -13.57333565
 -12.93572426]
1
tensor([ -7.1998,  -6.2362,  -6.6205,  -4.8983, -14.3513, -15.4462],
       grad_fn=<SqueezeBackward3>)
[ -6.97852373  -6.9195075   -4.6609509   -4.96625853 -14.23791885
 -12.95784187]
1
tensor([ -7.1998,  -6.4738,  -6.6205,  -4.8983, -14.3513, -15.4462],
       grad_fn=<SqueezeBackward3>)
[ -5.11658621  -4.08440614  -3.99391437  -3.79289222  -7.96647167
 -11.95944691]
2
tensor([ -7.1998,  -6.4738,  -6.2872,  -4.8983, -14.3513, -15.4462],
       grad_fn=<SqueezeBackward3>)
[ -6.78404617  -6.67534161  -6.09040356  -6.32221031 -16.35254002
 -14.18592548]
0


 80%|###################################################2            | 8/10 [00:19<00:04,  2.48s/it]

tensor([ -7.0635,  -6.4738,  -6.2872,  -4.8983, -14.3513, -15.4462],
       grad_fn=<SqueezeBackward3>)
[ -7.73499918  -7.95159101  -5.35261631  -7.76408339 -13.02970982
  -9.9946022 ]
3
tensor([ -7.0635,  -6.4738,  -6.2872,  -5.8073, -14.3513, -15.4462],
       grad_fn=<SqueezeBackward3>)
[-15.65279007 -16.47212458 -17.12094212 -15.87608433 -23.08955479
 -22.95127869]
5
tensor([ -7.0635,  -6.4738,  -6.2872,  -5.8073, -14.3513, -16.2189],
       grad_fn=<SqueezeBackward3>)
[ -6.60701752  -7.67869282  -7.34293413  -7.70548964 -14.76137257
 -16.68464375]
0
tensor([ -7.5769,  -6.4738,  -6.2872,  -5.8073, -14.3513, -16.2189],
       grad_fn=<SqueezeBackward3>)
[ -7.30608368  -7.14953899  -7.23261309  -7.97120619 -14.02409077
 -10.00884247]
1
tensor([ -7.5769,  -7.3516,  -6.2872,  -5.8073, -14.3513, -16.2189],
       grad_fn=<SqueezeBackward3>)
[-15.65279007 -16.47212458 -17.12094212 -15.87608433 -23.08955479
 -22.95127869]
5
tensor([ -7.5769,  -7.3516,  -6.2872,  -5.8073, -14.3513, -16.593

 90%|#########################################################6      | 9/10 [00:22<00:02,  2.45s/it]

tensor([ -7.7080,  -7.8237,  -7.4823,  -7.8274, -14.3513, -16.7533],
       grad_fn=<SqueezeBackward3>)
[ -8.43062735  -9.09108639  -8.92885923  -8.54824877 -17.14206886
 -13.34205818]
3
tensor([ -7.7080,  -7.8237,  -7.4823,  -8.7147, -14.3513, -16.7533],
       grad_fn=<SqueezeBackward3>)
[ -9.07594299  -8.56095362  -9.00840282  -8.99640083 -17.4834919
 -17.55815315]
1
tensor([ -7.7080,  -8.7799,  -7.4823,  -8.7147, -14.3513, -16.7533],
       grad_fn=<SqueezeBackward3>)
[ -8.3654933   -8.91656399  -7.88962841  -8.63383722 -16.9597578
 -11.82638645]
2
tensor([ -7.7080,  -8.7799,  -8.3641,  -8.7147, -14.3513, -16.7533],
       grad_fn=<SqueezeBackward3>)
[ -8.00485849  -8.20733118  -7.08977842  -8.50148821 -15.27268696
 -13.99038887]
1
tensor([ -7.7080,  -8.6756,  -8.3641,  -8.7147, -14.3513, -16.7533],
       grad_fn=<SqueezeBackward3>)
[-17.89432573 -17.78849459 -17.39315033 -17.87327194 -27.09758949
 -26.55487442]
4
tensor([ -7.7080,  -8.6756,  -8.3641,  -8.7147, -15.3513, -16.7533]

100%|###############################################################| 10/10 [00:25<00:00,  2.52s/it]

tensor([ -8.8999,  -8.9361,  -9.2072,  -9.1079, -16.3365, -17.3209],
       grad_fn=<SqueezeBackward3>)
[ -7.86847067  -9.25383091  -9.58302498 -10.03838539 -14.60417652
 -17.02233887]
1
tensor([ -8.8999,  -9.4302,  -9.2072,  -9.1079, -16.3365, -17.3209],
       grad_fn=<SqueezeBackward3>)
[ -9.7943697   -7.09196615  -7.83079863  -7.38142252  -6.25300121
 -18.52843094]
3
tensor([ -8.8999,  -9.4302,  -9.2072,  -8.7223, -16.3365, -17.3209],
       grad_fn=<SqueezeBackward3>)
[ -9.72178555  -8.88359976  -7.68906498  -7.99149656 -13.69407654
 -15.49077892]
2
tensor([ -8.8999,  -9.4302,  -9.2390,  -8.7223, -16.3365, -17.3209],
       grad_fn=<SqueezeBackward3>)
[ -8.75641203  -8.92146826  -6.7844348   -8.28318262 -15.66818142
 -17.7232933 ]
0
tensor([ -8.9435,  -9.4302,  -9.2390,  -8.7223, -16.3365, -17.3209],
       grad_fn=<SqueezeBackward3>)
[ -9.70413589  -9.06889057 -10.17998886  -9.7267704  -14.95728683
 -18.40656853]
0
tensor([ -9.7217,  -9.4302,  -9.2390,  -8.7223, -16.3365, -17.320




In [36]:
environment.reset()

done = False
full_return = 0.

counter = 0
while not done and counter < environment.max_steps:
    
    s, reward, done, i = environment.step(environment.action)

    p, q = environment.boltzmann(s, return_q=True)
    a = environment.sample_action(p)
    
    environment.state, environment.action = s, a

    full_return = environment.agent.gamma * full_return + reward
    counter += 1
    
    print(np.argmax(s))
    print(environment.agent.q(s))


329
[-30.709248 -31.531038 -30.673918 -31.595798 -32.558308 -31.055958]
429
[-31.233753 -30.915483 -18.555916 -29.451986 -35.169575 -38.4846  ]
449
[-31.233753 -30.93541  -30.999357 -32.537136 -32.77127  -38.474052]
349
[-30.96778  -30.647896 -28.851017 -13.912269 -29.423548 -28.960613]
329
[-30.709248 -31.531038 -30.673918 -31.595798 -32.558308 -31.055958]
429
[-31.233753 -30.915483 -18.555916 -29.451986 -35.169575 -38.4846  ]
449
[-31.233753 -30.93541  -30.999357 -32.537136 -32.77127  -38.474052]
349
[-30.96778  -30.647896 -28.851017 -13.912269 -29.423548 -28.960613]
329
[-30.709248 -31.531038 -30.673918 -31.595798 -32.558308 -31.055958]
349
[-30.96778  -30.647896 -28.851017 -13.912269 -29.423548 -28.960613]
329
[-30.709248 -31.531038 -30.673918 -31.595798 -32.558308 -31.055958]
429
[-31.233753 -30.915483 -18.555916 -29.451986 -35.169575 -38.4846  ]
449
[-31.233753 -30.93541  -30.999357 -32.537136 -32.77127  -38.474052]
449
[-31.233753 -30.93541  -30.999357 -32.537136 -32.77127  -38.

In [26]:
n_episodes = 3
agent.temperature = 0.1

plt.figure()

for i in range(n_episodes):

    full_return, counter, observations = environment.evaluation_episode(render=False,return_observations=True)
    
    q = []
    
    observation_old = None
    q_old = None
    
    for observation in observations:
        
        observation_new = environment.state_representation(observation)
        q_new = environment.agent.q(environment.state_representation(observation))
        
        if observation_old is not None:
            same_obs = np.array_equal(observation_old, observation_new)
            same_q = np.array_equal(q_old, q_new)
            if not same_obs and not same_q:
                print("great")
        
        observation_old = observation_new
        q_old = q_new
        
        q.append(q_new)
        
    q = np.asarray(q)
    print(np.std(q, axis=0))
    print(q[:,0])
    plt.plot(q[:,0], label='0')
    break
    #plt.plot(q[:,1], label='1')
    #plt.plot(q[:,2], label='2')
    
plt.show()

AttributeError: 'TaxiEnv' object has no attribute '_get_obs'

<Figure size 432x288 with 0 Axes>

In [15]:
plt.figure()
for i in range(n_episodes):
    x = np.asarray(q_estimation[i])
    plt.plot(x[:,1])

plt.show()

NameError: name 'q_estimation' is not defined

<Figure size 432x288 with 0 Axes>

## Testing

In [55]:
agent.temperature = 100
for _ in range(1):
    environment.exploration_episode(render=True)

+---------+
|R: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+

+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (South)
191
+---------+
|R: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (East)
191
+---------+
|R: | : :G|
| : : : : |
| : : : :[43m [0m|
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (South)
291
+---------+
|R: | : :G|
| : : : : |
| : : : :[43m [0m|
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (Pickup)
291
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | :[43m [0m|
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (South)
391
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | :[43m [0m|
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (East)
391

+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m|[43m [0m: |[35mB[0m: |
+---------+
  (Dropoff)
431
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m|[43m [0m: |[35mB[0m: |
+---------+
  (South)
431
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m|[43m [0m: |[35mB[0m: |
+---------+
  (Dropoff)
431
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m|[43m [0m: |[35mB[0m: |
+---------+
  (Dropoff)
431
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m|[43m [0m: |[35mB[0m: |
+---------+
  (West)
431
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| :[43m [0m|[35mB[0m: |
+---------+
  (East)
451
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| :[43m [0m|[35mB[0m: |
+---------+
  (South)
451
+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| :[43m [0m|[35mB[0m: |
+-----

+---------+
|R: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (East)
191
+---------+
|R: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (Pickup)
191
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (East)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (Pickup)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (Dropoff)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
 

In [50]:
for _ in range(5):
    environment.evaluation_episode(render=True)

+---------+
|[35mR[0m: | :[43m [0m:G|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+

+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+

+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92


+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92
+---------+
|[35mR[0m: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (East)
92


+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (No

+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (No

+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (North)
91
+---------+
|R: | : :[43mG[0m|
| : : : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+
  (No

+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------

+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------

+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------

+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------

+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------

+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (North)
112
+---------+
|[35mR[0m: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
212
+---------+
|[35mR[0m: | : :G|
|[43m [0m: : : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------

+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------

+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------

+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (South)
249
+---------+
|R: | : :[35mG[0m|
| : :[43m [0m: : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+
  (North)
149
+---------+
|R: | : :[35mG[0m|
| : : : : |
| : :[43m [0m: : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------