In [9]:
import agent
from environment import Agent, Environment
from planner import RoutePlanner
from simulator import Simulator

In [10]:
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(agent.LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
    # for i in a.qt.items(): print(str(i).replace('(','').replace(')','').replace('\'','').replace(' ',''))

Simulator.run(): Trial 0
Environment.reset(): Trial set up with start = (4, 6), destination = (1, 1), deadline = 40
RoutePlanner.route_to(): destination = (1, 1)
Environment.step(): Primary agent ran out of time! Trial aborted.
Simulator.run(): Trial 1
Environment.reset(): Trial set up with start = (3, 5), destination = (3, 1), deadline = 20
RoutePlanner.route_to(): destination = (3, 1)
Environment.act(): Primary agent has reached destination!
Simulator.run(): Trial 2
Environment.reset(): Trial set up with start = (4, 2), destination = (6, 6), deadline = 30
RoutePlanner.route_to(): destination = (6, 6)
Environment.act(): Primary agent has reached destination!
Simulator.run(): Trial 3
Environment.reset(): Trial set up with start = (4, 3), destination = (6, 5), deadline = 20
RoutePlanner.route_to(): destination = (6, 5)
Environment.act(): Primary agent has reached destination!
Simulator.run(): Trial 4
Environment.reset(): Trial set up with start = (2, 6), destination = (8, 2), deadline =

In [14]:
import numpy as np
for i in range(0,100,10):
    print ('reward ' + str(np.mean(a.reward_tracker[i:i+10])))
    print ('penalty ' + str(np.mean(a.penalty_tracker[i:i+10])))

reward 20.8
penalty 2.5
reward 21.5
penalty 0.4
reward 22.1
penalty 1.7
reward 20.1
penalty 2.5
reward 21.6
penalty 1.7
reward 25.75
penalty 3.5
reward 29.05
penalty 4.1
reward 24.6
penalty 3.1
reward 23.95
penalty 2.2
reward 21.95
penalty 1.7


In [13]:
import numpy as np
print (np.mean(a.penalty_tracker[0:10]))
print (np.mean(a.penalty_tracker[10:20]))
print (np.mean(a.penalty_tracker[20:30]))
print (np.mean(a.penalty_tracker[30:40]))
print (np.mean(a.penalty_tracker[40:50]))
print (np.mean(a.penalty_tracker[50:60]))
print (np.mean(a.penalty_tracker[60:70]))
print (np.mean(a.penalty_tracker[70:80]))
print (np.mean(a.penalty_tracker[90:100]))

2.5
0.4
1.7
2.5
1.7
3.5
4.1
3.1
1.7


In [5]:
# turn qt into df
for i in a.qt.items():
a.qt

{(('forward', 'red', None, 'forward'), None): 3.3480934210936932,
 (('left', 'red', 'left', None), 'forward'): 0.1250118608076429,
 (('left', 'red', 'left', None), None): 4.264597805442614,
 (('forward', 'green', 'left', 'right'), None): 0,
 (('forward', 'red', 'forward', 'left'), 'right'): 0,
 (('left', 'green', None, 'left'), None): 0.0,
 (('left', 'green', 'right', 'forward'), 'forward'): 0,
 (('forward', 'red', 'right', 'forward'), 'left'): 0,
 (('forward', 'green', 'forward', 'right'), 'left'): 0,
 (('right', 'green', None, 'forward'), 'left'): 0,
 (('left', 'red', 'left', None), 'right'): 0.7672508103102624,
 (('left', 'red', 'forward', 'left'), None): 0,
 (('forward', 'red', 'forward', None), None): 0,
 (('right', 'red', 'left', 'left'), 'forward'): 0,
 (('forward', 'red', 'right', None), 'forward'): 0.7507782436370812,
 (('forward', 'green', None, 'left'), 'forward'): 9.948527457861813,
 (('right', 'green', 'forward', 'right'), None): 0,
 (('left', 'green', 'right', 'forward'),

In [9]:
to_str = lambda x: str(x).replace('(','').replace(')','').replace('\'','').replace(' ','')

In [22]:
arr = []
for i in map(to_str, a.qt.items()):
    arr.append(i.split(','))

In [23]:
arr

[['forward', 'red', 'None', 'forward', 'None', '3.678748210927025'],
 ['left', 'red', 'left', 'None', 'forward', '0'],
 ['left', 'red', 'left', 'None', 'None', '4.025578411370011'],
 ['forward', 'green', 'left', 'right', 'None', '0'],
 ['forward', 'red', 'forward', 'left', 'right', '0'],
 ['left', 'green', 'None', 'left', 'None', '0.49005492241889165'],
 ['left', 'green', 'right', 'forward', 'forward', '0'],
 ['forward', 'red', 'right', 'forward', 'left', '0'],
 ['forward', 'green', 'forward', 'right', 'left', '0'],
 ['right', 'green', 'None', 'forward', 'left', '1.0355756747201827'],
 ['left', 'red', 'left', 'None', 'right', '0'],
 ['left', 'red', 'forward', 'left', 'None', '0'],
 ['forward', 'red', 'forward', 'None', 'None', '7.037651954668828'],
 ['right', 'red', 'left', 'left', 'forward', '0'],
 ['forward', 'red', 'right', 'None', 'forward', '0'],
 ['forward', 'green', 'None', 'left', 'forward', '0'],
 ['right', 'green', 'forward', 'right', 'None', '0'],
 ['left', 'green', 'right',

In [21]:
map(to_str, a.qt)

<map at 0x8a4e0f0>

In [24]:
pd.DataFrame(arr)

Unnamed: 0,0,1,2,3,4,5
0,forward,red,,forward,,3.678748210927025
1,left,red,left,,forward,0
2,left,red,left,,,4.025578411370011
3,forward,green,left,right,,0
4,forward,red,forward,left,right,0
5,left,green,,left,,0.49005492241889165
6,left,green,right,forward,forward,0
7,forward,red,right,forward,left,0
8,forward,green,forward,right,left,0
9,right,green,,forward,left,1.0355756747201827
