In [16]:
import random
import math
from environment import Agent, Environment
from planner import RoutePlanner
from simulator import Simulator
# import numpy as np

class LearningAgent(Agent):
    """ An agent that learns to drive in the Smartcab world.
        This is the object you will be modifying. """ 

    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
        super(LearningAgent, self).__init__(env)     # Set the agent in the evironment 
        self.planner = RoutePlanner(self.env, self)  # Create a route planner
        self.valid_actions = self.env.valid_actions  # The set of valid actions

        # Set parameters of the learning agent
        self.learning = learning # Whether the agent is expected to learn
        self.Q = dict()          # Create a Q-table which will be a dictionary of tuples
        self.epsilon = epsilon   # Random exploration factor
        self.alpha = alpha       # Learning factor

        ###########
        ## TO DO ##
        ###########
        # Set any additional class parameters as needed
        self.trial = 0.0



    def reset(self, destination=None, testing=True):
        """ The reset function is called at the beginning of each trial.
            'testing' is set to True if testing trials are being used
            once training trials have completed. """

        # Select the destination as the new location to route to
        self.planner.route_to(destination)
        
        ########### 
        ## TO DO ##
        ###########
        # Update epsilon using a decay function of your choice
        # Update additional class parameters as needed
        # If 'testing' is True, set epsilon and alpha to 0
                # If 'testing' is True, set epsilon and alpha to 0
#         if testing==True:
#             self.epsilon = 0.0 
#             self.alpha = 0.0
#         else:
#             if testing ==False and self.trial<=10.0:
#                 self.alpha = .5
#                 self.trial = 10.0
#                 self.epsilon = self.epsilon 
#                 self.epsilon = 0.8 *.97
#             if testing == False and self.trial ==12.0:
#                 self.trial >=12.0
#                 self.epsilon= self.epsilon
#                 self.epsilon = .5*.96
#                 self.alpha = .5
#                 self.epsilon = 1/self.trial**2.0
#             if  testing ==False and self.trial>=15.0:
#                 self.alpha = .5
#                 self.trial = self.trial
#                 self.epsilon = np.cos(self.alpha*self.trial)
#             if  testing == False and self.trial <= 20.0:
#                 self.trial = self.trial
#                 self.alpha = .5
#                 self.epsilon += np.exp(-1*(self.alpha*self.trial))

        self.alpha = 0.015

        self.trial = self.trial+1

        if testing==True:
            self.epsilon = 0.0 
            self.alpha = 0.0
        elif testing == False:
#             self.epsilon = self.epsilon - .05
#             self.alpha = .5
#             self.trial += 1.0
#             self.epsilon = self.epsilon 
#             self.epsilon = self.epsilon*.25
#             self.epsilon = self.alpha**self.trial
#             self.epsilon = 1.0/self.trial**2.0
#             self.epsilon = np.cos(self.alpha*self.trial)
#             self.epsilon = np.exp(-1*(self.alpha*self.trial))
            self.epsilon = math.exp(-(self.alpha*self.trial))           
        
        return None

    def build_state(self):
        """ The build_state function is called when the agent requests data from the 
            environment. The next waypoint, the intersection inputs, and the deadline 
            are all features available to the agent. """

        # Collect data about the environment
        waypoint = self.planner.next_waypoint() # The next waypoint 
        inputs = self.env.sense(self)           # Visual input - intersection light and traffic
        deadline = self.env.get_deadline(self)  # Remaining deadline
          
       
        state = waypoint, inputs['light'], inputs['oncoming'], inputs['left']
#         if self.learning == True:
#             if state not in self.Q.keys():
#                 self.createQ(state)
#             else:
#                 state.learning ==False

        if self.learning ==True:
            self.createQ(state)

        return state

    def get_maxQ(self, state):
        """ The get_max_Q function is called when the agent is asked to find the
            maximum Q-value of all actions based on the 'state' the smartcab is in. """

        ########### 
        ## TO DO ##
        ###########
        # Calculate the maximum Q-value of all actions for a given state
#         state = self.Q[state][action]

#         max = max(state, key = lambda x: state[x])
#         for action in state:

#             maxQ <= self.Q[state][action]:
#                maxQ = self.Q(state[max] and [action])
        maxQ = max(self.Q[state].values())

        return maxQ 

    def createQ(self, state):
        """ The createQ function is called when a state is generated by the agent. """

        ########### 
        ## TO DO ##
        ###########
        # When learning, check if the 'state' is not in the Q-table
        # If it is not, create a new dictionary for that state
        #   Then, for each action available, set the initial Q-value to 0.0

#         if not self.learning:
#             state = self.valid_actions(state,{'left':0.0,'oncoming':0.25,'forward':0.25,'right':0.0}) #'None','forward'

#         if self.learning == True and state not in self.Q:
#             self.Q[state]={key: 0 for key in self.valid_actions}

        if self.learning==True and state not in self.Q:

            self.Q[state] = {action: 0 for action in self.valid_actions}

        return




    def choose_action(self, state):
        """ The choose_action function is called when the agent is asked to choose
            which action to take, based on the 'state' the smartcab is in. """

        # Set the agent state and default action
        self.state = state
        self.next_waypoint = self.planner.next_waypoint()
        action = None

        if self.learning:

            if self.epsilon > random.random():
                action = random.choice(self.valid_actions)

            else:
                maxQ = self.get_maxQ(state)
                action = random.choice([action for action in self.valid_actions if self.Q[state][action]==maxQ])

        if self.learning==False:

            action = random.choice(self.valid_actions)
        return action


    def learn(self, state, action, reward):
        """ The learn function is called after the agent completes an action and
            receives a reward. This function does not consider future rewards 
            when conducting learning. """

        # When learning, implement the value iteration update rule
        #   Use only the learning rate 'alpha' (do not use the discount factor 'gamma')

        if self.learning == True:
            self.Q[state][action] = (1 - self.alpha) * self.Q[state][action] + self.alpha * reward
#         if self.learning==True:
#             self.Q[state][action] = self.alpha*(reward-self.Q[state][action])
    

        return


    def update(self):
        """ The update function is called when a time step is completed in the 
            environment for a given trial. This function will build the agent
            state, choose an action, receive a reward, and learn if enabled. """

        state = self.build_state()          # Get current state
        self.createQ(state)                 # Create 'state' in Q-table
        action = self.choose_action(state)  # Choose an action
        reward = self.env.act(self, action) # Receive a reward
        self.learn(state, action, reward)   # Q-learn

        return
        

def run():
    """ Driving function for running the simulation. 
        Press ESC to close the simulation, or [SPACE] to pause the simulation. """

    ##############
    # Create the environment
    # Flags:
    #   verbose     - set to True to display additional output from the simulation
    #   num_dummies - discrete number of dummy agents in the environment, default is 100
    #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
    env = Environment(verbose=True)
    
    ##############
    # Create the driving agent
    # Flags:
    #   learning   - set to True to force the driving agent to use Q-learning
    #    * epsilon - continuous value for the exploration factor, default is 1
    #    * alpha   - continuous value for the learning rate, default is 0.5
    agent = env.create_agent(LearningAgent, learning=True)#,alpha = .6,epsilon=.75
    
    ##############
    # Follow the driving agent
    # Flags:
    #   enforce_deadline - set to True to enforce a deadline metric
    env.set_primary_agent(agent,enforce_deadline=True)

    ##############
    # Create the simulation
    # Flags:
    #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
    #   display      - set to False to disable the GUI if PyGame is enabled
    #   log_metrics  - set to True to log trial and simulation results to /logs
    #   optimized    - set to True to change the default log file name
    sim = Simulator(env,update_delay=.00015,log_metrics=True,display=False,optimized=True)
    
    ##############
    # Run the simulator
    # Flags:
    #   tolerance  - epsilon tolerance before beginning testing, default is 0.05 
    #   n_test     - discrete number of testing trials to perform, default is 0
    sim.run(n_test=20, tolerance =.02)#tolerance=.02


if __name__ == '__main__':
    run()



/-------------------------
| Training trial 1
\-------------------------

Environment.reset(): Trial set up with start = (5, 7), destination = (2, 4), deadline = 30
Simulating trial. . . 
epsilon = 0.9851; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (5, 2), heading: (0, 1), action: right, reward: 2.30655995489
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'left', None), 'deadline': 30, 't': 0, 'action': 'right', 'reward': 2.30655995489312, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 2.31)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (5, 2), heading: (0, 1), action: for

Agent previous state: ('forward', 'green', 'left', None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.45)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (3, 3), heading: (0, -1), action: right, reward: 1.46112438566
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 17, 't': 13, 'action': 'right', 'reward': 1.461124385655295, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent drove right instead of forward. (rewarded 1.46)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (3, 3), heading: (0, -1), action: None, reward: 2.55070756475
Environment.act(): St

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.04)
7% of time remaining to reach destination.

/-------------------
| Step 28 Results
\-------------------

Environment.step(): t = 28
Environment.act() [POST]: location: (3, 3), heading: (-1, 0), action: left, reward: 0.498762442416
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 2, 't': 28, 'action': 'left', 'reward': 0.49876244241608414, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 0.50)
3% of time remaining to reach destination.

/-------------------
| Step 29 Results
\-------------------

Environment.step(): t = 29
Environment.act() [POST]: location: (3, 2), heading: (0, -1), action: right, reward: 0.786353124052
Environment.act(): Step data: {'inputs': {

Agent previous state: ('right', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 1.59)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (5, 5), heading: (0, -1), action: forward, reward: -0.0978244945172
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'forward', None), 'deadline': 7, 't': 13, 'action': 'forward', 'reward': -0.0978244945172061, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'forward', None)
Agent drove forward instead of right. (rewarded -0.10)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (5, 5), heading: (0, -1), action: None, reward: -5.40109278527
Environment.act(): 

Agent previous state: ('left', 'red', None, 'forward')
Agent attempted driving right through traffic and cause a minor accident. (rewarded -19.64)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (7, 7), heading: (0, 1), action: forward, reward: -40.4238102744
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 4, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 18, 't': 7, 'action': 'forward', 'reward': -40.42381027442065, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent attempted driving forward through a red light with traffic and cause a major accident. (rewarded -40.42)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (7, 7), hea

Agent previous state: ('forward', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -10.08)
16% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (8, 6), heading: (0, 1), action: right, reward: 0.00864924201037
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 4, 't': 21, 'action': 'right', 'reward': 0.008649242010366875, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent drove right instead of forward. (rewarded 0.01)
12% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (7, 6), heading: (-1, 0), action: right, reward: 0.0167668637803
Environment.a

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.22)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (4, 4), heading: (-1, 0), action: right, reward: -0.00536520596642
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 17, 't': 13, 'action': 'right', 'reward': -0.005365205966415099, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded -0.01)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (4, 4), heading: (-1, 0), action: forward, reward: -39.527194234
Environment.act(): Step data: {'inputs': {'li

Agent previous state: ('forward', 'red', 'left', None)
Agent attempted driving forward through a red light. (rewarded -10.27)
13% of time remaining to reach destination.

/-------------------
| Step 26 Results
\-------------------

Environment.step(): t = 26
Environment.act() [POST]: location: (2, 5), heading: (0, 1), action: left, reward: 0.963368247729
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', 'right'), 'deadline': 4, 't': 26, 'action': 'left', 'reward': 0.9633682477285008, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', 'right')
Agent drove left instead of forward. (rewarded 0.96)
10% of time remaining to reach destination.

/-------------------
| Step 27 Results
\-------------------

Environment.step(): t = 27
Environment.act() [POST]: location: (3, 5), heading: (1, 0), action: left, reward: 0.119528002572
Environme

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.57)
67% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (2, 4), heading: (0, -1), action: left, reward: 1.73611541207
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 20, 't': 10, 'action': 'left', 'reward': 1.736115412074212, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent drove left instead of forward. (rewarded 1.74)
63% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (2, 4), heading: (0, -1), action: right, reward: -20.0205850153
Environment.act(): Step data: {'inputs

Agent previous state: ('left', 'green', 'left', 'left')
Agent followed the waypoint left. (rewarded 0.38)
7% of time remaining to reach destination.

/-------------------
| Step 28 Results
\-------------------

Environment.step(): t = 28
Environment.act() [POST]: location: (3, 2), heading: (0, -1), action: left, reward: -0.470347324142
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 2, 't': 28, 'action': 'left', 'reward': -0.47034732414152647, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent drove left instead of forward. (rewarded -0.47)
3% of time remaining to reach destination.

/-------------------
| Step 29 Results
\-------------------

Environment.step(): t = 29
Environment.act() [POST]: location: (2, 2), heading: (-1, 0), action: left, reward: 0.518922584751
Environment.act(): Step data: {'inputs':

Agent previous state: ('forward', 'red', None, 'forward')
Agent attempted driving forward through a red light with traffic and cause a major accident. (rewarded -40.98)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (8, 3), heading: (-1, 0), action: forward, reward: -39.9561460176
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 6, 't': 14, 'action': 'forward', 'reward': -39.95614601764617, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent attempted driving forward through a red light with traffic and cause a major accident. (rewarded -39.96)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
En

Agent previous state: ('left', 'red', 'forward', None)
Agent drove right instead of left. (rewarded 0.44)
80% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (4, 5), heading: (0, -1), action: right, reward: 0.0600491731949
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', None), 'deadline': 28, 't': 7, 'action': 'right', 'reward': 0.06004917319492942, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', None)
Agent drove right instead of forward. (rewarded 0.06)
77% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (4, 5), heading: (0, -1), action: None, reward: -5.08652602056
Environment.act(): Step data: {'inputs':

Agent previous state: ('left', 'red', 'right', None)
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -40.57)
40% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (5, 5), heading: (-1, 0), action: None, reward: 1.30886790863
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 14, 't': 21, 'action': None, 'reward': 1.3088679086306267, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.31)
37% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (5, 6), heading: (0, 1), action: left, reward: 1.24232024513
Environm

Agent previous state: ('right', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.60)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (4, 5), heading: (0, 1), action: left, reward: 1.34551451117
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 18, 't': 2, 'action': 'left', 'reward': 1.345514511174175, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent drove left instead of right. (rewarded 1.35)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (3, 5), heading: (-1, 0), action: right, reward: 1.61900318291
Environment.act(): Step dat

Agent previous state: ('left', 'red', 'left', None)
Agent attempted driving forward through a red light. (rewarded -10.45)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (1, 4), heading: (1, 0), action: right, reward: 0.148170614797
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', None), 'deadline': 4, 't': 16, 'action': 'right', 'reward': 0.14817061479729232, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', None)
Agent drove right instead of left. (rewarded 0.15)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: forward, reward: 0.621697953466
Environment.act(): Step da

\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (2, 2), heading: (1, 0), action: None, reward: 1.94238622374
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 18, 't': 12, 'action': None, 'reward': 1.942386223739556, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.94)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (2, 3), heading: (0, 1), action: right, reward: 1.52124605252
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 17, 't': 13, 'action': 'right', 'reward': 1.5212460525164182

Agent previous state: ('left', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 2.07)
13% of time remaining to reach destination.

/-------------------
| Step 26 Results
\-------------------

Environment.step(): t = 26
Environment.act() [POST]: location: (5, 5), heading: (0, 1), action: None, reward: 1.19516595597
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 4, 't': 26, 'action': None, 'reward': 1.1951659559698624, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.20)
10% of time remaining to reach destination.

/-------------------
| Step 27 Results
\-------------------

Environment.step(): t = 27
Environment.act() [POST]: location: (5, 5), heading: (0, 1), action: left, reward: -39.4195952412
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncom

Agent previous state: ('forward', 'green', None, 'forward')
Agent followed the waypoint forward. (rewarded 2.25)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (2, 2), heading: (-1, 0), action: forward, reward: -9.4406251862
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': 'left', 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'right', None), 'deadline': 8, 't': 12, 'action': 'forward', 'reward': -9.440625186203729, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', None)
Agent attempted driving forward through a red light. (rewarded -9.44)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (2, 7), heading: (0, -1), action: right, reward: 0.517796738123
Environme

Agent previous state: ('left', 'red', None, 'left')
Agent attempted driving forward through a red light. (rewarded -9.29)
77% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (8, 4), heading: (0, 1), action: None, reward: 1.1323207877
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 23, 't': 7, 'action': None, 'reward': 1.1323207877031627, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.13)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (8, 4), heading: (0, 1), action: left, reward: -39.9659449716
Environment.act(): Step data: {'inputs': {'light': 'r


/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
Environment.act() [POST]: location: (6, 4), heading: (0, 1), action: None, reward: -5.6139129938
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 1, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 10, 't': 20, 'action': None, 'reward': -5.61391299380086, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent idled at a green light with no oncoming traffic. (rewarded -5.61)
30% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (6, 4), heading: (0, 1), action: None, reward: -0.434719581726
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, '

Agent previous state: ('left', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 1.41)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 2), heading: (-1, 0), action: forward, reward: -9.47364441407
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 16, 't': 4, 'action': 'forward', 'reward': -9.473644414071764, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -9.47)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (7, 2), heading: (-1, 0), action: forward, reward: 1.8322532451
Environment.act(): Step data: {'inputs'

Agent previous state: ('right', 'green', 'right', 'left')
Agent idled at a green light with no oncoming traffic. (rewarded -4.38)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (1, 2), heading: (1, 0), action: right, reward: 1.10609977771
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'forward', 'left'), 'deadline': 1, 't': 19, 'action': 'right', 'reward': 1.1060997777142711, 'waypoint': 'right'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('right', 'green', 'forward', 'left')
Agent followed the waypoint right. (rewarded 1.11)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 13
\-------------------------

Agent followed the waypoint forward. (rewarded 1.37)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (7, 5), heading: (1, 0), action: None, reward: 1.84164766498
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'right'), 'deadline': 12, 't': 13, 'action': None, 'reward': 1.8416476649801365, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'right')
Agent properly idled at a red light. (rewarded 1.84)
44% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (7, 6), heading: (0, 1), action: right, reward: 0.226406334366
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None

Agent properly idled at a red light. (rewarded 1.72)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (3, 7), heading: (0, -1), action: None, reward: 2.01923260695
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 27, 't': 3, 'action': None, 'reward': 2.0192326069476305, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.02)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (4, 7), heading: (1, 0), action: right, reward: 1.6687360085
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 

Agent previous state: ('right', 'green', 'right', None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.11)
40% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: right, reward: 2.13744376189
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'left', 'forward'), 'deadline': 12, 't': 18, 'action': 'right', 'reward': 2.1374437618912063, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'left', 'forward')
Agent followed the waypoint right. (rewarded 2.14)
37% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 15
\-------------------------

Environm

Agent previous state: ('right', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 1.51)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (1, 6), heading: (0, -1), action: None, reward: 1.16311290385
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, None), 'deadline': 15, 't': 15, 'action': None, 'reward': 1.1631129038522707, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.16)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (1, 6), heading: (0, -1), action: left, reward: -10.4781567251
Environment.act(): Step data: {'inputs': {'light': 'red',

Agent previous state: ('right', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -9.40)
83% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (4, 3), heading: (0, -1), action: forward, reward: -9.71845343193
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', None, 'left'), 'deadline': 29, 't': 6, 'action': 'forward', 'reward': -9.718453431934904, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'left')
Agent attempted driving forward through a red light. (rewarded -9.72)
80% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (3, 3), heading: (-1, 0), action: left, reward: 1.83946027302
Environment.act()

Agent previous state: ('right', 'red', 'forward', None)
Agent attempted driving forward through a red light. (rewarded -9.41)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (6, 2), heading: (-1, 0), action: right, reward: 1.2489125891
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', None), 'deadline': 8, 't': 12, 'action': 'right', 'reward': 1.248912589098624, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', None)
Agent followed the waypoint right. (rewarded 1.25)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (6, 2), heading: (-1, 0), action: None, reward: 0.769738689096
Environment.act(): Step 

60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (7, 4), heading: (1, 0), action: forward, reward: 1.02021135755
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'forward'), 'deadline': 12, 't': 8, 'action': 'forward', 'reward': 1.020211357548369, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'forward')
Agent drove forward instead of right. (rewarded 1.02)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (7, 4), heading: (1, 0), action: None, reward: -4.82079719006
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 1, 'light': 'gr

Agent previous state: ('right', 'red', 'left', 'right')
Agent attempted driving left through a red light. (rewarded -9.88)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 7), heading: (1, 0), action: None, reward: 1.17348381676
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 29, 't': 1, 'action': None, 'reward': 1.1734838167574853, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.17)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (3, 2), heading: (0, 1), action: right, reward: 2.30201679518
Environment.act(): Step data: {'inputs':

Agent previous state: ('right', 'green', 'right', None)
Agent drove forward instead of right. (rewarded 0.95)
37% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (3, 2), heading: (0, -1), action: left, reward: -0.151819041859
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 11, 't': 19, 'action': 'left', 'reward': -0.1518190418586386, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent drove left instead of right. (rewarded -0.15)
33% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
Environment.act() [POST]: location: (3, 2), heading: (0, -1), action: None, reward: 2.37502887292
Environment.act(): Step data: {'inputs': {'

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.14)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (8, 2), heading: (1, 0), action: left, reward: -9.6133543346
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 17, 't': 3, 'action': 'left', 'reward': -9.61335433460112, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent attempted driving left through a red light. (rewarded -9.61)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 2), heading: (1, 0), action: left, reward: -10.0890839444
Environment.act(): Step data: {'inputs': {'light': 're

Agent previous state: ('forward', 'red', 'forward', 'forward')
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -39.46)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (6, 5), heading: (0, -1), action: right, reward: -0.069803480027
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 2, 't': 18, 'action': 'right', 'reward': -0.06980348002702619, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded -0.07)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (6, 5), heading: (0, -1), a

Agent previous state: ('right', 'green', None, None)
Agent drove forward instead of right. (rewarded 0.84)
63% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: None, reward: -4.66355570935
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 1, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 19, 't': 11, 'action': None, 'reward': -4.663555709354386, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.66)
60% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: right, reward: 1.18529527887
Environment.act(): Step dat

Agent previous state: ('forward', 'red', 'forward', 'forward')
Agent properly idled at a red light. (rewarded 2.24)
10% of time remaining to reach destination.

/-------------------
| Step 27 Results
\-------------------

Environment.step(): t = 27
Environment.act() [POST]: location: (6, 5), heading: (1, 0), action: left, reward: -9.38770219609
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'left', 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'left', 'right'), 'deadline': 3, 't': 27, 'action': 'left', 'reward': -9.387702196086273, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'right')
Agent attempted driving left through a red light. (rewarded -9.39)
7% of time remaining to reach destination.

/-------------------
| Step 28 Results
\-------------------

Environment.step(): t = 28
Environment.act() [POST]: location: (6, 5), heading: (1, 0), action: left, reward: -9.41816030095
Environment.

Agent previous state: ('left', 'green', None, 'forward')
Agent drove forward instead of left. (rewarded 0.24)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (3, 6), heading: (-1, 0), action: left, reward: 0.919857395693
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 8, 't': 12, 'action': 'left', 'reward': 0.9198573956926654, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 0.92)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (2, 6), heading: (-1, 0), action: forward, reward: 1.42788094362
Environment.act(): Step data

Agent previous state: ('left', 'green', None, 'forward')
Agent drove right instead of left. (rewarded 1.54)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: right, reward: 1.08660115185
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 15, 't': 5, 'action': 'right', 'reward': 1.0866011518534964, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.09)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (4, 2), heading: (0, -1), action: right, reward: 0.32233059508
Environment.act(): Step data: {'inputs': {'lig

Agent previous state: ('forward', 'red', 'left', 'right')
Agent drove right instead of forward. (rewarded 0.83)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 5), heading: (1, 0), action: right, reward: 1.55457158329
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 29, 't': 1, 'action': 'right', 'reward': 1.5545715832920677, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', None)
Agent drove right instead of left. (rewarded 1.55)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (3, 5), heading: (1, 0), action: None, reward: -4.68780871174
Environment.act(): Step data: {'inputs': {'li

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 0.89)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (6, 6), heading: (0, 1), action: right, reward: 1.7247581434
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'left', None), 'deadline': 13, 't': 17, 'action': 'right', 'reward': 1.7247581434032149, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 1.72)
40% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (6, 6), heading: (0, 1), action: forward, reward: -10.1552683547
Environment.act(): Step data: {'inputs

Agent previous state: ('left', 'red', 'forward', 'left')
Agent properly idled at a red light. (rewarded 1.12)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (7, 4), heading: (0, 1), action: left, reward: -9.83948646284
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', 'forward', 'right'), 'deadline': 19, 't': 1, 'action': 'left', 'reward': -9.839486462844514, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', 'right')
Agent attempted driving left through a red light. (rewarded -9.84)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (7, 4), heading: (0, 1), action: left, reward: -10.7814392928
Environment.act(): Step

Agent previous state: ('right', 'green', None, None)
Agent drove left instead of right. (rewarded 0.84)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: left, reward: -40.2988166764
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 4, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 4, 't': 16, 'action': 'left', 'reward': -40.29881667638122, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -40.30)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: None, reward: 0.835376278447
E

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (6, 7), heading: (1, 0), action: None, reward: 1.82582023499
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 8, 't': 12, 'action': None, 'reward': 1.8258202349869361, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.83)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (6, 2), heading: (0, 1), action: right, reward: 0.713849610733
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', 'forward'), 

Agent previous state: ('forward', 'red', None, None)
Agent drove right instead of forward. (rewarded 0.78)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: left, reward: 2.72105983223
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 14, 't': 6, 'action': 'left', 'reward': 2.7210598322324397, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 2.72)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: None, reward: 1.69927407296
Environment.act(): Step data: {'inputs': {'l

Agent previous state: ('left', 'red', None, 'right')
Agent attempted driving forward through a red light. (rewarded -9.74)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 4), heading: (0, 1), action: None, reward: 1.01276271467
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 16, 't': 4, 'action': None, 'reward': 1.0127627146695954, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.01)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 4), heading: (0, 1), action: None, reward: 2.56358649827
Environment.act(): Step data: {'inputs': {'light': '

Agent previous state: ('right', 'red', None, 'left')
Agent followed the waypoint right. (rewarded 1.91)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (8, 3), heading: (0, -1), action: None, reward: -0.634321608702
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', 'left'), 'deadline': 2, 't': 18, 'action': None, 'reward': -0.6343216087017576, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', 'left')
Agent properly idled at a red light. (rewarded -0.63)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: right, reward: 0.34030535359
Environment.act(): Step data: {'i

Agent previous state: ('right', 'red', 'right', 'forward')
Agent properly idled at a red light. (rewarded 0.48)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (5, 4), heading: (0, 1), action: right, reward: 1.5440950443
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', 'left'), 'deadline': 13, 't': 12, 'action': 'right', 'reward': 1.5440950443019468, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', 'left')
Agent followed the waypoint right. (rewarded 1.54)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (5, 5), heading: (0, 1), action: forward, reward: 1.13820197027
Environment.act(): Step data

Agent properly idled at a red light. (rewarded 2.61)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (4, 5), heading: (1, 0), action: right, reward: 1.38359515625
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 16, 't': 4, 'action': 'right', 'reward': 1.3835951562526354, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', None)
Agent drove right instead of left. (rewarded 1.38)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (4, 6), heading: (0, 1), action: right, reward: 1.99264374889
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'viol

Environment.act() [POST]: location: (4, 4), heading: (0, -1), action: right, reward: 0.399086212758
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 20, 't': 0, 'action': 'right', 'reward': 0.39908621275786316, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 0.40)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (4, 4), heading: (0, -1), action: forward, reward: -40.5995470827
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 19, 't': 1, 'action': 'forward', 'reward': -40.59954708271274, 'wayp

Agent previous state: ('right', 'red', 'left', 'forward')
Agent properly idled at a red light. (rewarded 1.02)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (5, 6), heading: (0, -1), action: left, reward: -9.76166073508
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 2, 't': 18, 'action': 'left', 'reward': -9.761660735077548, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', None)
Agent attempted driving left through a red light. (rewarded -9.76)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (5, 6), heading: (0, -1), action: None, reward: -4.00053256951
Environment.act(): Step data: {'

Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded 1.65)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (8, 4), heading: (-1, 0), action: left, reward: -10.1480884029
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', 'left', 'left'), 'deadline': 8, 't': 12, 'action': 'left', 'reward': -10.14808840285795, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', 'left')
Agent attempted driving left through a red light. (rewarded -10.15)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (8, 3), heading: (0, -1), action: right, reward: 1.22080458874
Environment.act(): Step data: {'

Agent previous state: ('right', 'green', 'left', None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.01)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (1, 6), heading: (1, 0), action: right, reward: 1.64128609037
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 25, 't': 5, 'action': 'right', 'reward': 1.6412860903726039, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent followed the waypoint right. (rewarded 1.64)
80% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (1, 6), heading: (1, 0), action: None, reward: 2.84674442346
Environment.act(): Step data:

Agent previous state: ('left', 'red', None, None)
Agent attempted driving left through a red light. (rewarded -9.92)
23% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act() [POST]: location: (5, 5), heading: (0, -1), action: forward, reward: -10.4055253174
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', 'right', None), 'deadline': 7, 't': 23, 'action': 'forward', 'reward': -10.40552531739623, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'right', None)
Agent attempted driving forward through a red light. (rewarded -10.41)
20% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
Environment.act() [POST]: location: (5, 5), heading: (0, -1), action: None, reward: 1.46136261167
Environment.act(

Agent previous state: ('forward', 'red', 'left', 'left')
Agent drove right instead of forward. (rewarded 0.43)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (7, 5), heading: (0, 1), action: left, reward: -10.6462731782
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 16, 't': 9, 'action': 'left', 'reward': -10.64627317816888, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', None)
Agent attempted driving left through a red light. (rewarded -10.65)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: right, reward: 1.1328869799
Environment.act(): Step data

Agent previous state: ('forward', 'red', 'right', None)
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -40.67)
8% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act() [POST]: location: (3, 6), heading: (-1, 0), action: None, reward: 0.956038958471
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 2, 't': 23, 'action': None, 'reward': 0.9560389584712776, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 0.96)
4% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
Environment.act() [POST]: location: (3, 6), heading: (-1, 0), action: None, reward: 1.932457

Agent previous state: ('right', 'green', None, 'right')
Agent drove forward instead of right. (rewarded 1.61)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (2, 6), heading: (1, 0), action: right, reward: 1.27665569084
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 6, 't': 14, 'action': 'right', 'reward': 1.2766556908375164, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent followed the waypoint right. (rewarded 1.28)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (3, 6), heading: (1, 0), action: forward, reward: 1.42296587241
Environment.act(): Step data: {'in

Agent previous state: ('left', 'green', 'right', 'forward')
Agent attempted driving left through traffic and cause a minor accident. (rewarded -19.99)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (2, 2), heading: (0, -1), action: None, reward: 2.89909811164
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', 'forward'), 'deadline': 18, 't': 7, 'action': None, 'reward': 2.8990981116383177, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', 'forward')
Agent properly idled at a red light. (rewarded 2.90)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (2, 2), heading: (0, -1), action: None, reward: 2.86934290856
E

Environment.step(): t = 1
Environment.act() [POST]: location: (5, 3), heading: (-1, 0), action: right, reward: 1.54802991867
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 19, 't': 1, 'action': 'right', 'reward': 1.5480299186654407, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent drove right instead of left. (rewarded 1.55)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 3), heading: (-1, 0), action: forward, reward: -9.79888842717
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, 'right'), 'deadline': 18, 't': 2, 'action': 'forward', 'reward': -9.79888842716752, 'waypoint': '

Agent previous state: ('right', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 0.55)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (4, 2), heading: (0, 1), action: forward, reward: 0.989189953355
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'left', None), 'deadline': 3, 't': 17, 'action': 'forward', 'reward': 0.9891899533552625, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'left', None)
Agent drove forward instead of right. (rewarded 0.99)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (4, 2), heading: (0, 1), action: None, reward: 0.0771987951959
Environment.act(): Step data: {'inpu

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.85)
71% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: None, reward: 1.13337554576
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', None), 'deadline': 25, 't': 10, 'action': None, 'reward': 1.1333755457584946, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 1.13)
69% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (6, 4), heading: (0, -1), action: right, reward: 0.200925360197
Environment.act(): Step data: {'inputs': {

Agent previous state: ('left', 'red', None, 'forward')
Agent attempted driving forward through a red light with traffic and cause a major accident. (rewarded -40.56)
31% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
Environment.act() [POST]: location: (6, 4), heading: (0, -1), action: None, reward: 1.20777110483
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 11, 't': 24, 'action': None, 'reward': 1.2077711048297628, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.21)
29% of time remaining to reach destination.

/-------------------
| Step 25 Results
\-------------------

Environment.step(): t = 25
Environment.act() [POST]: location: (7, 4), heading: (1, 0), action: right, reward: 0.0132734869413


Agent previous state: ('forward', 'red', 'left', 'forward')
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -40.23)
94% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 6), heading: (-1, 0), action: None, reward: 1.87611736611
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 33, 't': 2, 'action': None, 'reward': 1.876117366113566, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.88)
91% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: right, reward: 0.41

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.35)
46% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (7, 5), heading: (0, 1), action: left, reward: 1.21865530304
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 16, 't': 19, 'action': 'left', 'reward': 1.2186553030449385, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent drove left instead of forward. (rewarded 1.22)
43% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
Environment.act() [POST]: location: (7, 5), heading: (0, 1), action: None, reward: 0.27209865854
Environment.act(): Step data: {'inputs': {'li

Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded 0.04)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (6, 7), heading: (0, 1), action: left, reward: 0.528473303879
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 18, 't': 2, 'action': 'left', 'reward': 0.528473303878863, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent drove left instead of right. (rewarded 0.53)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (6, 7), heading: (0, 1), action: None, reward: 2.31351207544
Environment.act(): Step data: {'inputs': {'light': 'red', 'on

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 6), heading: (-1, 0), action: left, reward: -10.3643542096
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, 'left'), 'deadline': 29, 't': 1, 'action': 'left', 'reward': -10.364354209551774, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'left')
Agent attempted driving left through a red light. (rewarded -10.36)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (3, 5), heading: (0, -1), action: right, reward: 0.543191733388
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'lef

Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 1.29)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (8, 6), heading: (1, 0), action: None, reward: 2.70538689112
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 15, 't': 15, 'action': None, 'reward': 2.70538689111755, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.71)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (8, 7), heading: (0, 1), action: right, reward: 0.645852189815
Environment.act(): Step data: {'inputs': {'light': 'gr

Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 2.78)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 6), heading: (-1, 0), action: left, reward: -10.3293496235
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 18, 't': 2, 'action': 'left', 'reward': -10.329349623493407, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent attempted driving left through a red light. (rewarded -10.33)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (5, 6), heading: (-1, 0), action: left, reward: -10.4465770245
Environment.act(): Step data: {'inputs

Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: right, reward: -0.467486363648
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', 'forward'), 'deadline': 5, 't': 15, 'action': 'right', 'reward': -0.4674863636482649, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', 'forward')
Agent drove right instead of left. (rewarded -0.47)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: left, reward: -10.5549823052
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'left', 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', 'forward', 'right'), 'deadline': 4, 't': 16, 'action': 'left', 'reward': -10.554982305203369

Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 1, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 12, 't': 8, 'action': None, 'reward': -5.329265243195721, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent idled at a green light with no oncoming traffic. (rewarded -5.33)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (4, 5), heading: (-1, 0), action: left, reward: 2.14320037697
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 11, 't': 9, 'action': 'left', 'reward': 2.1432003769653782, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint

Agent previous state: ('forward', 'green', None, 'forward')
Agent drove left instead of forward. (rewarded 1.39)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (1, 6), heading: (0, -1), action: left, reward: -40.5599805634
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'forward', 'left': None}, 'violation': 4, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 5, 't': 15, 'action': 'left', 'reward': -40.55998056344377, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', None)
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -40.56)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (2, 6), heading: (1, 0), action: right, reward

Agent previous state: ('forward', 'green', 'right', None)
Agent followed the waypoint forward. (rewarded 2.19)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: forward, reward: -9.29763167739
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 12, 't': 8, 'action': 'forward', 'reward': -9.29763167739228, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -9.30)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: None, reward: 1.22330099575
Environment.act(): Step data: 

Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 0.78)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (6, 2), heading: (1, 0), action: right, reward: 1.34489122158
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 27, 't': 3, 'action': 'right', 'reward': 1.3448912215805984, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded 1.34)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (6, 2), heading: (1, 0), action: None, reward: -4.25665264432
Environment.act(): Step data: {'inputs': {'light': 'g

Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.43)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: None, reward: 1.3063732319
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 13, 't': 17, 'action': None, 'reward': 1.306373231896625, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.31)
40% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: None, reward: 1.55303363638
Environment.act(): Step data: {'inputs': {'light': 'red', 'onc

Agent previous state: ('left', 'red', None, 'left')
Agent attempted driving left through a red light. (rewarded -9.27)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: forward, reward: 1.25744993232
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'forward', None), 'deadline': 7, 't': 13, 'action': 'forward', 'reward': 1.2574499323222335, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 1.26)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (7, 4), heading: (0, 1), action: left, reward: 1.92514138976
Environment.act(): S

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.08)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (8, 2), heading: (-1, 0), action: None, reward: 2.52706558596
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 12, 't': 8, 'action': None, 'reward': 2.527065585960049, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.53)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (7, 2), heading: (-1, 0), action: forward, reward: -0.0576759401674
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncom

Agent previous state: ('left', 'red', 'forward', None)
Agent drove right instead of left. (rewarded 1.48)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (7, 4), heading: (1, 0), action: right, reward: -20.2982090969
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 3, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 17, 't': 3, 'action': 'right', 'reward': -20.29820909688832, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent attempted driving right through traffic and cause a minor accident. (rewarded -20.30)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (7, 4), heading: (1, 0), action: None, reward: 2.06782461072
Environm

Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 0.96)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 50
\-------------------------

Environment.reset(): Trial set up with start = (5, 7), destination = (8, 3), deadline = 25
Simulating trial. . . 
epsilon = 0.4724; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: None, reward: 0.0707861845031
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'left'), 'deadline': 25, 't': 0, 'action': None, 'reward': 0.07078618450306196, 'waypoint': 'right'}
Agent previous state: ('right', 'red', Non

Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.09)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: forward, reward: 2.26863211459
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 15, 't': 5, 'action': 'forward', 'reward': 2.268632114590891, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.27)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: None, reward: 2.66454763991
Environment.act(): Step data: {'inputs':

Agent previous state: ('forward', 'green', None, 'forward')
Agent followed the waypoint forward. (rewarded 2.84)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (2, 6), heading: (1, 0), action: forward, reward: -9.29480165976
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 18, 't': 2, 'action': 'forward', 'reward': -9.294801659759983, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -9.29)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (3, 6), heading: (1, 0), action: forward, reward: 2.09162978649
Environment.act(): Step 

Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('forward', 'green', 'forward', None)
Agent drove right instead of forward. (rewarded -0.83)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 53
\-------------------------

Environment.reset(): Trial set up with start = (6, 7), destination = (3, 4), deadline = 30
Simulating trial. . . 
epsilon = 0.4516; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (6, 2), heading: (0, 1), action: right, reward: 1.10306806305
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', None), 'deadline': 30, 't': 0, 'action': 'right', 'reward': 1.1030680630460798, 'waypoint': 'right'}
Agent previous state: ('right

Agent previous state: ('forward', 'green', 'left', None)
Agent drove right instead of forward. (rewarded 1.43)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (4, 5), heading: (1, 0), action: None, reward: 0.834797335931
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 16, 't': 14, 'action': None, 'reward': 0.8347973359312546, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 0.83)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (4, 5), heading: (1, 0), action: None, reward: 1.99340590541
Environment.act(): Step data: {'inputs': {'light': '

Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 2.84)
83% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (3, 7), heading: (0, -1), action: left, reward: 0.579527480984
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 29, 't': 6, 'action': 'left', 'reward': 0.5795274809841906, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent drove left instead of forward. (rewarded 0.58)
80% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (3, 7), heading: (0, -1), action: None, reward: 0.00794106973347
Environment.act(): Step data: {'input

Agent followed the waypoint right. (rewarded 2.28)
37% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: None, reward: 0.780580101728
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 13, 't': 22, 'action': None, 'reward': 0.7805801017280263, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 0.78)
34% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: None, reward: 1.21034124067
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'le

Agent previous state: ('right', 'red', 'left', None)
Agent followed the waypoint right. (rewarded 2.20)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (5, 2), heading: (1, 0), action: None, reward: 2.76427399128
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 17, 't': 3, 'action': None, 'reward': 2.764273991284141, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 2.76)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (5, 2), heading: (1, 0), action: None, reward: 2.8033897237
Environment.act(): Step data: {'inputs': {'light': 'red', 'onc

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 2.02)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (6, 7), heading: (1, 0), action: None, reward: 1.58906582315
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'left'), 'deadline': 19, 't': 6, 'action': None, 'reward': 1.5890658231543477, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'left')
Agent properly idled at a red light. (rewarded 1.59)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (6, 7), heading: (1, 0), action: None, reward: -5.31256536328
Environment.act(): Step data: {'inputs': {'light'

Agent previous state: ('right', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.11)
80% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (8, 2), heading: (0, -1), action: right, reward: 1.1835001847
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, None), 'deadline': 24, 't': 6, 'action': 'right', 'reward': 1.1835001847043678, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, None)
Agent followed the waypoint right. (rewarded 1.18)
77% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (1, 2), heading: (1, 0), action: right, reward: 2.47738116637
Environment.act(): Step data: {'inputs': {'light': 'red', 'onco

Agent previous state: ('left', 'green', 'forward', 'left')
Agent drove right instead of left. (rewarded -0.32)
27% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (2, 5), heading: (0, 1), action: None, reward: 0.685583649347
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 8, 't': 22, 'action': None, 'reward': 0.6855836493469072, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 0.69)
23% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: right, reward: 0.402471912752
Environment.act(): Step data: {'inputs': 

Agent previous state: ('right', 'red', 'left', None)
Agent followed the waypoint right. (rewarded 2.72)
80% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (8, 3), heading: (0, -1), action: forward, reward: -40.6306864155
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('right', 'red', None, 'forward'), 'deadline': 24, 't': 6, 'action': 'forward', 'reward': -40.63068641553333, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'forward')
Agent attempted driving forward through a red light with traffic and cause a major accident. (rewarded -40.63)
77% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: right, reward

Agent previous state: ('right', 'red', None, None)
Agent properly idled at a red light. (rewarded -0.13)
30% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (5, 3), heading: (0, 1), action: None, reward: -4.99838084699
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 1, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 9, 't': 21, 'action': None, 'reward': -4.998380846989167, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -5.00)
27% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: right, reward: 2.02908993047
Environment.act(): Step data: {'i

Agent previous state: ('left', 'green', None, 'forward')
Agent drove right instead of left. (rewarded 1.39)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (7, 4), heading: (0, 1), action: left, reward: -9.94150191295
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', None, None), 'deadline': 16, 't': 4, 'action': 'left', 'reward': -9.941501912949649, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, None)
Agent attempted driving left through a red light. (rewarded -9.94)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 4), heading: (1, 0), action: left, reward: 1.76143946379
Environment.act(): Step data: {'inputs': {'ligh

Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 1.02)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: None, reward: 1.81573779207
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 2, 't': 18, 'action': None, 'reward': 1.8157377920726872, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 1.82)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: left, reward: -9.37683544517
Environment.act(): Step data: {'inputs': {'lig

Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded -0.20)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: right, reward: 0.831746260537
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'right'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'right'), 'deadline': 5, 't': 15, 'action': 'right', 'reward': 0.8317462605367798, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'right')
Agent drove right instead of left. (rewarded 0.83)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: right, reward: 1.96815136603
Environment.act(): Step data: {'inputs':

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.80)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (5, 6), heading: (0, -1), action: None, reward: 2.30850728556
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 17, 't': 8, 'action': None, 'reward': 2.3085072855603217, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.31)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (5, 6), heading: (0, -1), action: None, reward: 1.47256794647
Environment.act(): Step data: {'inputs': {'light': 'red', 

Agent drove right instead of left. (rewarded 0.76)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: forward, reward: 0.273663171584
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': 0.27366317158442843, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent drove forward instead of right. (rewarded 0.27)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: right, reward: -20.6058370833
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None,

Agent previous state: ('forward', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 1.41)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 2), heading: (-1, 0), action: left, reward: -10.1888352402
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 17, 't': 3, 'action': 'left', 'reward': -10.188835240235717, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent attempted driving left through a red light. (rewarded -10.19)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (4, 2), heading: (-1, 0), action: forward, reward: -10.0670320353
Environment.act(): Step data: 

Agent previous state: ('forward', 'green', 'left', None)
Agent drove right instead of forward. (rewarded 1.81)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (2, 4), heading: (0, 1), action: forward, reward: 1.61503689925
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 21, 't': 4, 'action': 'forward', 'reward': 1.6150368992519089, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent drove forward instead of left. (rewarded 1.62)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (2, 4), heading: (0, 1), action: left, reward: -9.21013044974
Environment.act(): Step data: {'inputs': 

| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (7, 3), heading: (1, 0), action: None, reward: 0.832470859272
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 10, 't': 10, 'action': None, 'reward': 0.8324708592722945, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 0.83)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (7, 3), heading: (1, 0), action: forward, reward: -9.4915279671
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', None, 'right'), 'deadline': 9, 't': 11, 'action': 'forward', 'r

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.06)
77% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (4, 5), heading: (0, -1), action: forward, reward: 1.00688578044
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 23, 't': 7, 'action': 'forward', 'reward': 1.0068857804397324, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove forward instead of left. (rewarded 1.01)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (4, 5), heading: (0, -1), action: None, reward: 1.90145910161
Environment.act(): Step data: {'inputs': {'light': 're

30% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (3, 6), heading: (-1, 0), action: None, reward: 0.861836315262
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 9, 't': 21, 'action': None, 'reward': 0.8618363152622488, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 0.86)
27% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (2, 6), heading: (-1, 0), action: forward, reward: -0.0237952605793
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'right', 'left': None}, 'violation': 0, 'light': 'green', 'state': 

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.44)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (6, 6), heading: (0, -1), action: right, reward: -0.0178874797009
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', 'forward'), 'deadline': 16, 't': 4, 'action': 'right', 'reward': -0.017887479700873343, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', 'forward')
Agent drove right instead of forward. (rewarded -0.02)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (6, 6), heading: (0, -1), action: left, reward: -10.1457458962
Environment.ac

Agent previous state: ('left', 'green', None, None)
Agent drove forward instead of left. (rewarded 0.36)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (7, 6), heading: (0, -1), action: right, reward: 1.1601375576
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', None), 'deadline': 19, 't': 6, 'action': 'right', 'reward': 1.1601375576017228, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', None)
Agent drove right instead of forward. (rewarded 1.16)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (7, 6), heading: (0, -1), action: right, reward: -19.5226902112
Environment.act(): Step data: 

Agent previous state: ('forward', 'green', 'left', None)
Agent idled at a green light with no oncoming traffic. (rewarded -5.28)
4% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
Environment.act() [POST]: location: (4, 7), heading: (0, 1), action: forward, reward: 0.748277699416
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 1, 't': 24, 'action': 'forward', 'reward': 0.7482776994155107, 'waypoint': 'forward'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 0.75)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 69
\-------------------------

Env

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.19)
30% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 70
\-------------------------

Environment.reset(): Trial set up with start = (2, 2), destination = (4, 4), deadline = 20
Simulating trial. . . 
epsilon = 0.3499; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (2, 7), heading: (0, -1), action: right, reward: 1.25047505294
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'right', None), 'deadline': 20, 't': 0, 'action': 'right', 'reward': 1.2504750529391844, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'right', None)
Agent drove right instead of left. (rewarded 1.25)
95% o

Agent previous state: ('left', 'red', 'forward', 'forward')
Agent properly idled at a red light. (rewarded 2.56)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 5), heading: (0, -1), action: forward, reward: -10.5797172766
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', 'forward', 'right'), 'deadline': 26, 't': 4, 'action': 'forward', 'reward': -10.57971727664354, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', 'right')
Agent attempted driving forward through a red light. (rewarded -10.58)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (1, 5), heading: (1, 0), action: right, reward: 0.859046799999
Environm

Agent previous state: ('right', 'red', 'left', 'forward')
Agent properly idled at a red light. (rewarded 1.67)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 7), heading: (0, 1), action: right, reward: 2.4877245881
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 24, 't': 1, 'action': 'right', 'reward': 2.48772458810189, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', None)
Agent followed the waypoint right. (rewarded 2.49)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (2, 7), heading: (-1, 0), action: right, reward: 1.39330698327
Environment.act(): Step data: {'inputs': {'light': 're

Agent previous state: ('left', 'red', 'right', 'left')
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -40.05)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: right, reward: 1.58363146859
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'right', None), 'deadline': 19, 't': 6, 'action': 'right', 'reward': 1.5836314685892692, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'right', None)
Agent drove right instead of left. (rewarded 1.58)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (8, 2), heading: (0, 1), action: right, reward: 1

Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 0.56)
16% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (4, 2), heading: (1, 0), action: forward, reward: 1.25457181797
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 4, 't': 21, 'action': 'forward', 'reward': 1.254571817973039, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.25)
12% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (4, 2), heading: (1, 0), action: left, reward: -40.5839301174
Environment.act(): Step data: {'inp

Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 1.75)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (4, 7), heading: (-1, 0), action: forward, reward: -10.7241305714
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': 'left'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, 'left'), 'deadline': 10, 't': 10, 'action': 'forward', 'reward': -10.724130571406413, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'left')
Agent attempted driving forward through a red light. (rewarded -10.72)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: right, reward: 1.69471830925
Environment.act

Agent previous state: ('forward', 'green', None, 'left')
Agent idled at a green light with no oncoming traffic. (rewarded -5.99)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (2, 7), heading: (0, 1), action: right, reward: 0.723273188697
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 17, 't': 8, 'action': 'right', 'reward': 0.7232731886966582, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 0.72)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (3, 7), heading: (1, 0), action: left, reward: 2.60792983345
Environment.act()

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.28)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: forward, reward: 2.1578675701
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 11, 't': 9, 'action': 'forward', 'reward': 2.1578675700991816, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 2.16)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (7, 4), heading: (0, 1), action: left, reward: 2.49811742655
Environment.act(): Step data: {'in

| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (6, 3), heading: (1, 0), action: None, reward: 2.63740042504
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 17, 't': 3, 'action': None, 'reward': 2.6374004250423546, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.64)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (6, 3), heading: (1, 0), action: None, reward: 1.39440334472
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 16, 't': 4, 'action': None, 'reward': 1.39440334

Agent previous state: ('right', 'green', None, None)
Agent idled at a green light with no oncoming traffic. (rewarded -4.60)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (1, 6), heading: (-1, 0), action: right, reward: 2.25323024433
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'left', None), 'deadline': 19, 't': 1, 'action': 'right', 'reward': 2.253230244325854, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 2.25)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: right, reward: 0.996108123457
Environment.act(): Step da

Agent previous state: ('forward', 'green', 'right', None)
Agent followed the waypoint forward. (rewarded 2.03)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: right, reward: 1.4280543969
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'left'), 'deadline': 10, 't': 10, 'action': 'right', 'reward': 1.4280543968953336, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'left')
Agent followed the waypoint right. (rewarded 1.43)
45% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 80
\-------------------------

Environment.reset(): Trial set up with start =

Agent previous state: ('left', 'green', 'right', 'right')
Agent drove forward instead of left. (rewarded 1.69)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: None, reward: 1.69542745925
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 24, 't': 1, 'action': None, 'reward': 1.6954274592510872, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.70)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: None, reward: 2.11567698704
Environment.act(): Step data: {'inputs': {'light': 'red', 'onc

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.68)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 6), heading: (0, -1), action: None, reward: 1.35200978325
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 18, 't': 2, 'action': None, 'reward': 1.3520097832462905, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.35)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 6), heading: (0, -1), action: forward, reward: -10.0744840874
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncomin

Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 0.90)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (3, 5), heading: (-1, 0), action: right, reward: 1.17751250381
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'right'), 'deadline': 12, 't': 8, 'action': 'right', 'reward': 1.1775125038100815, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'right')
Agent followed the waypoint right. (rewarded 1.18)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (3, 6), heading: (0, 1), action: left, reward: 2.0126402362

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.40)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 4), heading: (1, 0), action: None, reward: 1.55442814593
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 22, 't': 3, 'action': None, 'reward': 1.5544281459265143, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.55)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (1, 4), heading: (1, 0), action: None, reward: 1.25248565296
Environment.act(): Step data: {'inputs': {'light': 'red', 'o

Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'right'), 'deadline': 25, 't': 0, 'action': 'right', 'reward': 1.5078801390519827, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'right')
Agent followed the waypoint right. (rewarded 1.51)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (2, 2), heading: (0, -1), action: right, reward: 1.58441914677
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 24, 't': 1, 'action': 'right', 'reward': 1.5844191467732602, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rew

Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 11, 't': 14, 'action': None, 'reward': 1.7307109791271635, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.73)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (8, 6), heading: (0, -1), action: None, reward: 1.53299617081
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 10, 't': 15, 'action': None, 'reward': 1.5329961708056596, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 1.53)
36% of t

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.74)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (5, 7), heading: (1, 0), action: None, reward: 1.44988286855
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 14, 't': 6, 'action': None, 'reward': 1.4498828685509064, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.45)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (5, 7), heading: (1, 0), action: None, reward: 1.51964546511
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncomi

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.58)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (7, 2), heading: (1, 0), action: forward, reward: -39.9971521619
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 4, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 8, 't': 12, 'action': 'forward', 'reward': -39.99715216189791, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent attempted driving forward through a red light with traffic and cause a major accident. (rewarded -40.00)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (7, 7), heading: (0, -1), action: left, reward:

Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 1.52)
60% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (2, 4), heading: (-1, 0), action: None, reward: 1.19097774
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 18, 't': 12, 'action': None, 'reward': 1.1909777400030832, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.19)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (2, 4), heading: (-1, 0), action: None, reward: 2.3108331332
Environment.act(): Step data: {'inputs': {'light': 'red',

Agent previous state: ('left', 'red', 'left', None)
Agent attempted driving forward through a red light. (rewarded -10.87)
7% of time remaining to reach destination.

/-------------------
| Step 28 Results
\-------------------

Environment.step(): t = 28
Environment.act() [POST]: location: (1, 2), heading: (0, -1), action: forward, reward: -10.2849194308
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 2, 't': 28, 'action': 'forward', 'reward': -10.284919430797295, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', None)
Agent attempted driving forward through a red light. (rewarded -10.28)
3% of time remaining to reach destination.

/-------------------
| Step 29 Results
\-------------------

Environment.step(): t = 29
Environment.act() [POST]: location: (8, 2), heading: (-1, 0), action: left, reward: 0.130555503568
Environment.a

Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 0.05)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (4, 7), heading: (-1, 0), action: left, reward: 0.919530099587
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 13, 't': 12, 'action': 'left', 'reward': 0.9195300995868272, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 0.92)
48% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 91
\-------------------------

Environment.reset(): Trial set up with start = (7,

75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 5), heading: (0, -1), action: left, reward: 2.87802345106
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 15, 't': 5, 'action': 'left', 'reward': 2.8780234510622167, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.88)
70% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 93
\-------------------------

Environment.reset(): Trial set up with start = (7, 6), destination = (4, 2), deadline = 25
Simulating trial. . . 
epsilon = 0.2478; alpha = 0.0150

/-------------------

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.00)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (2, 3), heading: (-1, 0), action: left, reward: 1.37675342077
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 23, 't': 2, 'action': 'left', 'reward': 1.37675342076905, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.38)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (2, 3), heading: (-1, 0), action: None, reward: 1.46715340816
Environment.act(): Step data: {'inputs': {'light': 'red', 'onc

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.28)
24% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (8, 3), heading: (0, -1), action: forward, reward: -9.74729446929
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 6, 't': 19, 'action': 'forward', 'reward': -9.747294469287691, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent attempted driving forward through a red light. (rewarded -9.75)
20% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
Environment.act() [POST]: location: (8, 3), heading: (0, -1), action: None, reward: 0.943878917593
Environment.act(): 

Agent previous state: ('forward', 'red', 'right', 'forward')
Agent attempted driving left through a red light with traffic and cause a major accident. (rewarded -39.78)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (8, 4), heading: (-1, 0), action: None, reward: 1.01238680007
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 16, 't': 9, 'action': None, 'reward': 1.0123868000686786, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 1.01)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (8, 4), heading: (-1, 0), action: None, 

\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (6, 3), heading: (0, 1), action: forward, reward: 0.843563042305
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', None), 'deadline': 20, 't': 0, 'action': 'forward', 'reward': 0.8435630423049296, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', None)
Agent drove forward instead of left. (rewarded 0.84)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (5, 3), heading: (-1, 0), action: right, reward: 1.81870209165
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 19, 't': 1, 'action': 'right', 'r

Agent previous state: ('left', 'red', None, None)
Agent drove right instead of left. (rewarded 1.00)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (7, 2), heading: (0, -1), action: None, reward: 0.572927693882
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', 'left'), 'deadline': 3, 't': 17, 'action': None, 'reward': 0.5729276938824756, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', 'left')
Agent properly idled at a red light. (rewarded 0.57)
10% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (8, 2), heading: (1, 0), action: right, reward: 1.2901487916
Environment.act(): Step data: {'inputs': {'light': 'r

Agent previous state: ('left', 'green', None, None)
Agent drove forward instead of left. (rewarded 0.88)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (4, 7), heading: (-1, 0), action: left, reward: 2.60858609557
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 13, 't': 12, 'action': 'left', 'reward': 2.608586095568172, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 2.61)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (3, 7), heading: (-1, 0), action: forward, rew

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.77)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: None, reward: 2.43451423006
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 9, 't': 11, 'action': None, 'reward': 2.4345142300574802, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 2.43)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: None, reward: 1.2349677435
Environment.act(): Step data: {'inputs': {'light'

Agent previous state: ('forward', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.87)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (8, 7), heading: (-1, 0), action: None, reward: 1.26845123557
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 12, 't': 8, 'action': None, 'reward': 1.268451235571995, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.27)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (7, 7), heading: (-1, 0), action: forward, reward: 1.88772124

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.17)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (6, 3), heading: (-1, 0), action: None, reward: 1.43530101923
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 5, 't': 15, 'action': None, 'reward': 1.4353010192304494, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.44)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (5, 3), heading: (-1, 0), action: forward, reward: 0.68

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.08)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (5, 6), heading: (1, 0), action: left, reward: 0.568077633099
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 6, 't': 14, 'action': 'left', 'reward': 0.5680776330992823, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 0.57)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (5, 7), heading: (0, 1), action: right, reward: 0.489672250

Agent previous state: ('forward', 'green', None, 'right')
Agent drove left instead of forward. (rewarded 1.46)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (6, 3), heading: (-1, 0), action: right, reward: 1.77803202288
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 14, 't': 11, 'action': 'right', 'reward': 1.7780320228796513, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent followed the waypoint right. (rewarded 1.78)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (6, 2), heading: (0, -1), action: right, reward: 0.447231269355
Environment.act(): Step data: {'inpu

\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (2, 7), heading: (-1, 0), action: right, reward: 0.079187315589
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', 'forward'), 'deadline': 18, 't': 2, 'action': 'right', 'reward': 0.07918731558898562, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', 'forward')
Agent drove right instead of forward. (rewarded 0.08)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 7), heading: (-1, 0), action: forward, reward: 1.05518044017
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'forward', None), 'deadline':

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.39)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (2, 5), heading: (0, 1), action: right, reward: 0.0231204745614
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 4, 't': 16, 'action': 'right', 'reward': 0.023120474561350246, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent drove right instead of forward. (rewarded 0.02)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (2, 5), heading: (0, 1), action: None, reward: 1.32211497318
Environment.act(): Step data: {

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.37)
67% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (8, 4), heading: (-1, 0), action: forward, reward: -9.49621954196
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 20, 't': 10, 'action': 'forward', 'reward': -9.496219541964635, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent attempted driving forward through a red light. (rewarded -9.50)
63% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (8, 4), heading: (-1, 0), action: left, reward: -20.850810973
Environment.act(

Environment.step(): t = 1
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: None, reward: 1.37321346291
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'left', 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', 'right'), 'deadline': 29, 't': 1, 'action': None, 'reward': 1.3732134629121968, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', 'right')
Agent properly idled at a red light. (rewarded 1.37)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: None, reward: 1.40804826163
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', 'left'), 'deadline': 28, 't': 2, 'action': None, 'reward': 

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.57)
40% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: right, reward: 0.2171117222
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 12, 't': 18, 'action': 'right', 'reward': 0.21711172219996366, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', None)
Agent drove right instead of left. (rewarded 0.22)
37% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: None, reward: -0.171573019048
Environment.act(): Step data: {'inputs': {'light': 'red

Agent previous state: ('forward', 'red', None, None)
Agent attempted driving forward through a red light. (rewarded -10.47)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 4), heading: (-1, 0), action: None, reward: 1.84955553727
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 23, 't': 2, 'action': None, 'reward': 1.849555537265809, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.85)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 4), heading: (-1, 0), action: None, reward: 2.8788705247
Environment.act(): Step data: {'inpu

Agent previous state: ('right', 'red', 'right', None)
Agent followed the waypoint right. (rewarded 1.60)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: None, reward: -5.52858738331
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'left', 'left': None}, 'violation': 1, 'light': 'green', 'state': ('forward', 'green', 'forward', None), 'deadline': 19, 't': 1, 'action': None, 'reward': -5.528587383306589, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', None)
Agent idled at a green light with no oncoming traffic. (rewarded -5.53)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: forward, reward: 2.32167115304
Environment

Agent previous state: ('forward', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.20)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (1, 2), heading: (0, -1), action: right, reward: 0.775240903186
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 13, 't': 7, 'action': 'right', 'reward': 0.7752409031857304, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 0.78)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (2, 2), heading: (1, 0), action: right, reward: 0.778533585026
Environment.act(): Step data: {'inp


Environment.step(): t = 7
Environment.act() [POST]: location: (2, 2), heading: (1, 0), action: left, reward: 1.74999925671
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 18, 't': 7, 'action': 'left', 'reward': 1.7499992567128793, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.75)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (3, 2), heading: (1, 0), action: forward, reward: 1.16502101768
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 17, 't': 8, 'action': 'forward', 'reward': 1.1650210176818834, 'waypo

Agent previous state: ('right', 'red', None, None)
Agent followed the waypoint right. (rewarded 1.02)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (8, 3), heading: (-1, 0), action: left, reward: 0.246205371627
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 22, 't': 8, 'action': 'left', 'reward': 0.24620537162743605, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent drove left instead of right. (rewarded 0.25)
70% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (8, 2), heading: (0, -1), action: right, reward: 2.14661448624
Environment.act(): Step data: {'inputs': {'lig

Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 2.81)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (6, 6), heading: (-1, 0), action: forward, reward: 2.5107617233
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', 'right'), 'deadline': 22, 't': 8, 'action': 'forward', 'reward': 2.510761723303405, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'forward', 'right')
Agent followed the waypoint forward. (rewarded 2.51)
70% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (6, 6), heading: (-1, 0), action: None, reward: 0.986621325664
Environment.act

Agent previous state: ('right', 'red', 'right', None)
Agent followed the waypoint right. (rewarded 0.70)
17% of time remaining to reach destination.

/-------------------
| Step 25 Results
\-------------------

Environment.step(): t = 25
Environment.act() [POST]: location: (6, 4), heading: (0, -1), action: right, reward: 0.693879165316
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 5, 't': 25, 'action': 'right', 'reward': 0.6938791653156351, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 0.69)
13% of time remaining to reach destination.

/-------------------
| Step 26 Results
\-------------------

Environment.step(): t = 26
Environment.act() [POST]: location: (6, 4), heading: (0, -1), action: None, reward: 1.98530430793
Environment.act(): Step data: {'in

Agent previous state: ('right', 'green', 'right', 'forward')
Agent drove forward instead of right. (rewarded 1.52)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (6, 3), heading: (-1, 0), action: None, reward: 0.506280566792
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', 'forward'), 'deadline': 13, 't': 12, 'action': None, 'reward': 0.5062805667924601, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', 'forward')
Agent properly idled at a red light. (rewarded 0.51)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (6, 2), heading: (0, -1), action: right, reward: 1.13795716879
Environment.act(

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.77)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (4, 6), heading: (0, 1), action: right, reward: 2.57745068017
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 10, 't': 10, 'action': 'right', 'reward': 2.577450680172362, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.58)
45% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 114
\-------------------------

Environment.reset(): Trial set up with start = (8, 2), des

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.61)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (5, 2), heading: (1, 0), action: right, reward: 0.31111545286
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'right', None), 'deadline': 25, 't': 5, 'action': 'right', 'reward': 0.31111545285953557, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'right', None)
Agent drove right instead of left. (rewarded 0.31)
80% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (6, 2), heading: (1, 0), action: forward, reward: 2.02981218536
Environment.act(): Step data: {'inputs': {'light'

Agent previous state: ('right', 'green', 'forward', 'forward')
Agent drove forward instead of right. (rewarded 1.60)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (4, 5), heading: (0, 1), action: left, reward: 1.46754640688
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'left'), 'deadline': 19, 't': 1, 'action': 'left', 'reward': 1.4675464068772355, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'left')
Agent drove left instead of right. (rewarded 1.47)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 5), heading: (1, 0), action: left, reward: 0.978275107804
Environment.act(): Step data: {'inputs':

\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (5, 7), heading: (1, 0), action: None, reward: 0.416602133676
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 2, 't': 18, 'action': None, 'reward': 0.4166021336760348, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 0.42)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (5, 7), heading: (1, 0), action: None, reward: 1.83010304996
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 1, 't': 19, 'action': None, 'rew

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 0.96)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (4, 7), heading: (1, 0), action: left, reward: 0.813586847537
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 16, 't': 14, 'action': 'left', 'reward': 0.813586847536939, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 0.81)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (5, 7), heading: (1, 0), action: forward, rewar

Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 0.51)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 6), heading: (1, 0), action: forward, reward: 1.23899855914
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 14, 't': 11, 'action': 'forward', 'reward': 1.2389985591408053, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove forward instead of left. (rewarded 1.24)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (2, 6), heading: (1, 0), action: forward, reward: 0.496890424159
Environment.act(): Step data: {'inputs

Agent previous state: ('right', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 0.62)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (7, 2), heading: (0, -1), action: right, reward: 1.47227797714
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, None), 'deadline': 18, 't': 2, 'action': 'right', 'reward': 1.472277977144766, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, None)
Agent followed the waypoint right. (rewarded 1.47)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (7, 2), heading: (0, -1), action: forward, reward: -9.15407174738
Environment.act(): Step data: {'inputs': {'light': 'red', '

Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.44)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (7, 7), heading: (0, -1), action: forward, reward: 1.28680893586
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'right', 'left'), 'deadline': 4, 't': 16, 'action': 'forward', 'reward': 1.2868089358646448, 'waypoint': 'right'}
Agent previous state: ('right', 'green', 'right', 'left')
Agent drove forward instead of right. (rewarded 1.29)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: right, reward: 1.96521840987
Environment.act(): Step data: 

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.45)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: left, reward: 0.979702936846
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 9, 't': 11, 'action': 'left', 'reward': 0.9797029368456269, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 0.98)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: None, reward: 1.2437014254
Environment.act(): Step data: {'inputs': {'light': 'red',

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.28)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: left, reward: 2.51570651384
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', None), 'deadline': 20, 't': 5, 'action': 'left', 'reward': 2.5157065138406547, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 2.52)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: None, reward: 1.74807379182
Environment.act(): Step data: {'inputs': {'light': 'red',

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.17)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 4), heading: (0, -1), action: None, reward: 1.04018391431
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 21, 't': 4, 'action': None, 'reward': 1.0401839143101286, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.04)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 4), heading: (0, -1), action: None, reward: 1.71315369126
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.76)
16% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (5, 2), heading: (0, 1), action: left, reward: 1.34734551601
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 4, 't': 21, 'action': 'left', 'reward': 1.3473455160056238, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 1.35)
12% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (5, 2), heading: (0, 1), action: None, reward: 0.680053315064
Environment.act(): Step data: {'inputs': {'

Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 1.19)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (8, 5), heading: (0, 1), action: None, reward: 2.43004247144
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 16, 't': 9, 'action': None, 'reward': 2.430042471440534, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.43)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (8, 5), heading: (0, 1), action: left, reward: -10.0981303053
Environment.act(): Step data: {'inputs': {'light': 'red', 'on

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.76)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (3, 2), heading: (0, 1), action: None, reward: 2.80454308434
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'right'), 'deadline': 23, 't': 2, 'action': None, 'reward': 2.804543084336302, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 2.80)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (3, 2), heading: (0, 1), action: None, reward: 1.51508925398
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncom


Environment.step(): t = 19
Environment.act() [POST]: location: (7, 5), heading: (-1, 0), action: None, reward: 0.576435320561
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 6, 't': 19, 'action': None, 'reward': 0.5764353205608232, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 0.58)
20% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
Environment.act() [POST]: location: (7, 5), heading: (-1, 0), action: None, reward: 1.40979533732
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'right'), 'deadline': 5, 't': 20, 'action': None, 'reward': 1.4097953373153733,

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.50)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 7), heading: (0, 1), action: None, reward: 2.93142296045
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 16, 't': 4, 'action': None, 'reward': 2.931422960446394, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 2.93)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (1, 7), heading: (1, 0), action: left, reward: 2.5954673599
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncomin

Agent previous state: ('left', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 2.68)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (8, 4), heading: (0, 1), action: None, reward: 1.70724514485
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 19, 't': 1, 'action': None, 'reward': 1.707245144851598, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.71)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (8, 4), heading: (0, 1), action: None, reward: 1.33482405377
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.32)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (3, 2), heading: (0, -1), action: left, reward: 0.541061706941
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 5, 't': 15, 'action': 'left', 'reward': 0.5410617069412753, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 0.54)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (2, 2), heading: (-1, 0), action: left, reward: 1.87202151642
Environment.act(): Step data: {'inputs': {'light':

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.66)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: forward, reward: 1.54818725692
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 16, 't': 9, 'action': 'forward', 'reward': 1.548187256922615, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.55)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: None, reward: 1.59351286597
Environment.act(): Step data: {'inputs': {'

Agent previous state: ('right', 'green', None, 'left')
Agent followed the waypoint right. (rewarded 2.64)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (4, 7), heading: (0, -1), action: right, reward: 1.77100806355
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 23, 't': 2, 'action': 'right', 'reward': 1.771008063553546, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 1.77)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 7), heading: (0, -1), action: None, reward: 1.45857971812
Environment.act(): Step data: {'inputs':

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.34)
36% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (3, 5), heading: (0, -1), action: None, reward: 0.91591053265
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 9, 't': 16, 'action': None, 'reward': 0.9159105326500854, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 0.92)
32% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: left, reward: 0.958839524646
Environ

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.37)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (6, 4), heading: (-1, 0), action: None, reward: 1.9363990755
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 16, 't': 4, 'action': None, 'reward': 1.9363990754974114, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.94)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (6, 4), heading: (-1, 0), action: None, reward: 1.18391145417
Environment.act(): Step data: {'inputs': {'light': 

Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 0.54)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (8, 4), heading: (1, 0), action: right, reward: -0.0379141456199
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 13, 't': 7, 'action': 'right', 'reward': -0.03791414561986817, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent drove right instead of left. (rewarded -0.04)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (8, 4), heading: (1, 0), action: forward, reward: -39.6669167615
Environment.act(): Step data: {'inputs': {'li

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.24)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (6, 5), heading: (1, 0), action: forward, reward: 1.06904048763
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 26, 't': 4, 'action': 'forward', 'reward': 1.0690404876263626, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.07)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (6, 6), heading: (0, 1), action: right, reward: 0.995027661651
Environment.act(): Step data: {'in

Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 1.76)
27% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (1, 7), heading: (0, 1), action: None, reward: 0.377440052889
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', 'right'), 'deadline': 8, 't': 22, 'action': None, 'reward': 0.3774400528892865, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', 'right')
Agent properly idled at a red light. (rewarded 0.38)
23% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 7), heading: (-1, 0), action: right, reward: 

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.70)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: forward, reward: 2.47697130324
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 17, 't': 13, 'action': 'forward', 'reward': 2.476971303238744, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 2.48)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (2, 5), heading: (0, 1), action: right, reward: 0.701539021571
Environment.act(): Step data: 

Environment.act() [POST]: location: (2, 2), heading: (1, 0), action: left, reward: 1.41407591131
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 16, 't': 4, 'action': 'left', 'reward': 1.41407591130868, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 1.41)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (2, 2), heading: (1, 0), action: None, reward: -5.33671163861
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 1, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 15, 't': 5, 'action': None, 'reward': -5.336711638605578, 'waypoint': 'forward'}
Agent

Agent previous state: ('right', 'red', None, None)
Agent properly idled at a red light. (rewarded 0.64)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (2, 7), heading: (0, 1), action: right, reward: 1.10392705641
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'right'), 'deadline': 15, 't': 10, 'action': 'right', 'reward': 1.1039270564099297, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'right')
Agent followed the waypoint right. (rewarded 1.10)
56% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 138
\-------------------------

Environment.reset(): Trial set up with start = (2, 6


/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (4, 2), heading: (0, 1), action: None, reward: 1.37684987422
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 13, 't': 12, 'action': None, 'reward': 1.376849874219728, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.38)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (4, 3), heading: (0, 1), action: forward, reward: 0.821015681007
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 's

Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.21)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (7, 6), heading: (1, 0), action: forward, reward: 1.75624452994
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 14, 't': 11, 'action': 'forward', 'reward': 1.7562445299407035, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.76)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (7, 7), heading: (0, 1), action: right

Agent previous state: ('right', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded -0.31)
35% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (3, 4), heading: (1, 0), action: right, reward: 2.40453808775
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, None), 'deadline': 7, 't': 13, 'action': 'right', 'reward': 2.4045380877463103, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, None)
Agent followed the waypoint right. (rewarded 2.40)
30% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 141
\-------------------------

Environment.reset(): Trial set up with start = (2, 3)

Agent previous state: ('left', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 1.87)
44% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (5, 3), heading: (1, 0), action: None, reward: 1.37055232408
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 11, 't': 14, 'action': None, 'reward': 1.3705523240808501, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.37)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (5, 4), heading: (0, 1), action: right, reward: 0.329034098536
Environment.act(): Step data: {'inputs': {'light': 'green', 

Agent followed the waypoint right. (rewarded 2.09)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 5), heading: (-1, 0), action: right, reward: 2.32134497554
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 18, 't': 2, 'action': 'right', 'reward': 2.3213449755432842, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.32)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (5, 5), heading: (-1, 0), action: None, reward: 1.84940706981
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'righ

| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (4, 2), heading: (1, 0), action: None, reward: 1.56312692103
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'right', None), 'deadline': 2, 't': 18, 'action': None, 'reward': 1.5631269210268353, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 1.56)
5% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (4, 2), heading: (1, 0), action: None, reward: 0.487326858612
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 1, 't': 19, 'action': None, 'reward': 0.

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.33)
80% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (7, 4), heading: (1, 0), action: None, reward: 1.02380548046
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'right', None), 'deadline': 28, 't': 7, 'action': None, 'reward': 1.0238054804575394, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 1.02)
77% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (7, 4), heading: (1, 0), action: None, reward: 2.55091659812
Environment.act(): Step data: {'inputs': {'light': 'red', 

| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (5, 2), heading: (-1, 0), action: forward, reward: 1.10552464215
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 16, 't': 4, 'action': 'forward', 'reward': 1.105524642153422, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.11)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: right, reward: 1.25244446644
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'forward', None), 'deadline': 15, 't'

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.56)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: forward, reward: 1.02977410409
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 20, 't': 5, 'action': 'forward', 'reward': 1.0297741040866484, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.03)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: forward, reward: 2.56880142381
Environment.act(): Step data: {'inputs': 

Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.85)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (7, 5), heading: (-1, 0), action: left, reward: 1.15259897854
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', 'left'), 'deadline': 14, 't': 6, 'action': 'left', 'reward': 1.1525989785378916, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', 'left')
Agent followed the waypoint left. (rewarded 1.15)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (7, 6), heading: (0, 1), action: left, reward: 1.76708106863
Environment.act(): Step data: {'inputs': {'light':

Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 1.05)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (5, 5), heading: (0, 1), action: forward, reward: 1.28681160763
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', 'forward'), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': 1.2868116076275637, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', 'forward')
Agent drove forward instead of left. (rewarded 1.29)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (5, 5), heading: (0, 1), action: None, reward: 2.36811227425
Environment.act(): Step 

70% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 150
\-------------------------

Environment.reset(): Trial set up with start = (5, 3), destination = (8, 5), deadline = 25
Simulating trial. . . 
epsilon = 0.1054; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (5, 3), heading: (0, 1), action: None, reward: 1.68468549634
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', 'left'), 'deadline': 25, 't': 0, 'action': None, 'reward': 1.6846854963381488, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', 'left')
Agent properly idled at a red light. (rewarded 1.68)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------


Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.08)
36% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: right, reward: 1.16781772325
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 9, 't': 16, 'action': 'right', 'reward': 1.167817723254291, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 1.17)
32% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: None, reward: 0.589140897739
Environment.act(): Step data: {'i

Agent previous state: ('forward', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 2.65)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 3), heading: (-1, 0), action: None, reward: 1.62787447597
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 9, 't': 11, 'action': None, 'reward': 1.62787447597113, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.63)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (1, 3), heading: (-1, 0), action: None, reward: 0.879807358446
Environment.act(): Step data: {'inputs': {'light': 'r

Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.42)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (7, 7), heading: (-1, 0), action: None, reward: 1.57206010162
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 26, 't': 4, 'action': None, 'reward': 1.5720601016210554, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.57)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (7, 7), heading: (-1, 0), action: None, reward: 1.85328835277
Environment.act(): Step data: {'inputs': {'light': 'red', 'onc

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 0.63)
27% of time remaining to reach destination.

/-------------------
| Step 22 Results
\-------------------

Environment.step(): t = 22
Environment.act() [POST]: location: (1, 5), heading: (1, 0), action: forward, reward: 1.2891850471
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'forward'), 'deadline': 8, 't': 22, 'action': 'forward', 'reward': 1.289185047096637, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'forward')
Agent followed the waypoint forward. (rewarded 1.29)
23% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act() [POST]: location: (1, 6), heading: (0, 1), action: right, reward: 1.24238974582
Environment.act(): Step data: 

Agent previous state: ('forward', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 2.42)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 7), heading: (0, -1), action: forward, reward: 1.82699018907
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 11, 't': 9, 'action': 'forward', 'reward': 1.8269901890666342, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.83)
50% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 154
\-------------------------

Environment.reset(): Trial set up with st

Agent previous state: ('forward', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.81)
50% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: None, reward: 1.70332378769
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'right'), 'deadline': 15, 't': 15, 'action': None, 'reward': 1.7033237876858613, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 1.70)
47% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (7, 3), heading: (-1, 0), action: None, reward: 2.51790324958
Environment.act(): Step data: {'inputs': {

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.56)
60% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (8, 6), heading: (0, -1), action: None, reward: 1.62839197721
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 18, 't': 12, 'action': None, 'reward': 1.6283919772057431, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.63)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (7, 6), heading: (-1, 0), action: left, reward: 1.7314978557
Environment.act(): Step data: {'inputs': {'light': 'green', 

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.39)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: None, reward: 2.54010143844
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 14, 't': 6, 'action': None, 'reward': 2.5401014384448355, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.54)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: None, reward: 2.32106591251
Environment.act(): Step data: {'inputs': {'light': 'red', 'o

Agent previous state: ('left', 'red', 'forward', None)
Agent drove right instead of left. (rewarded 1.07)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (4, 7), heading: (0, 1), action: left, reward: 1.6122129901
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 18, 't': 2, 'action': 'left', 'reward': 1.6122129901044906, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 1.61)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 7), heading: (0, 1), action: None, reward: -4.03261392011
Environment.act(): Step data: {'inputs': {'light': 'gree

Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 0.15)
90% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 2), heading: (0, 1), action: None, reward: 1.59352603677
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'right', None), 'deadline': 27, 't': 3, 'action': None, 'reward': 1.593526036773777, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 1.59)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: right, reward: 1.14702667466
Environment.act(): Step data: {'inputs': {'light': 'r

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.10)
40% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (6, 7), heading: (1, 0), action: left, reward: 1.81360354203
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 12, 't': 18, 'action': 'left', 'reward': 1.8136035420282854, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 1.81)
37% of time remaining to reach destination.

/-------------------
| Step 19 Results
\-------------------

Environment.step(): t = 19
Environment.act() [POST]: location: (6, 7), heading: (1, 0), action: left, reward: -9.77713931395
Environment.act(): Step data: {'inputs':

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 0.98)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: None, reward: 1.85501082123
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 17, 't': 3, 'action': None, 'reward': 1.8550108212279217, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.86)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 2), heading: (0, 1), action: right, reward: 0.234146296867
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncom

\-------------------------

Environment.reset(): Trial set up with start = (6, 5), destination = (3, 6), deadline = 20
Simulating trial. . . 
epsilon = 0.0907; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0907; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: None, reward: 2.05188811387
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 20, 't': 0, 'action': None, 'reward': 2.051888113874564, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.05)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (6, 5), heading: (-1, 0), action: None

Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.11)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (3, 4), heading: (-1, 0), action: forward, reward: 1.03584573002
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 6, 't': 14, 'action': 'forward', 'reward': 1.0358457300210362, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.04)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (3, 4), heading: (-1, 0), action: None, reward: 1.52810376062
Environment.act(): Step data: {'in

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.88)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 5), heading: (0, -1), action: forward, reward: 1.4707896136
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': 1.470789613596352, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.47)
60% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 162
\-------------------------

Environment.reset(): Trial set up with start 

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 2.70)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (1, 4), heading: (0, -1), action: right, reward: 2.22275831428
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 12, 't': 8, 'action': 'right', 'reward': 2.2227583142794636, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.22)
55% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 164
\-------------------------

Environment.reset(): Trial set up with start = (5, 6),

Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 2.41)
44% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (8, 7), heading: (0, 1), action: left, reward: -9.33535982957
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 11, 't': 14, 'action': 'left', 'reward': -9.335359829568645, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent attempted driving left through a red light. (rewarded -9.34)
40% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (8, 7), heading: (0, 1), action: None, reward: 1.16987710562
Environment.act(): Step

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.57)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (6, 3), heading: (1, 0), action: forward, reward: 2.27769783149
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 18, 't': 2, 'action': 'forward', 'reward': 2.277697831489289, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.28)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (6, 4), heading: (0, 1), action: right, rewar

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.04)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 4), heading: (0, 1), action: right, reward: 1.46271339116
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'right'), 'deadline': 14, 't': 11, 'action': 'right', 'reward': 1.4627133911647057, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'right')
Agent followed the waypoint right. (rewarded 1.46)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: left, reward: 1.28765167737
Environment.act(): Step data: {'inputs': {'light': '

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 2.39)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (6, 2), heading: (-1, 0), action: None, reward: 1.01625381114
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'right'), 'deadline': 17, 't': 3, 'action': None, 'reward': 1.0162538111389567, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'right')
Agent properly idled at a red light. (rewarded 1.02)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (5, 2), heading: (-1, 0), action: forward, reward: 1.2500120865
Environment.act(): Step data: {'inputs'

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.42)
67% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: forward, reward: 2.45326111203
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 20, 't': 10, 'action': 'forward', 'reward': 2.453261112033099, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.45)
63% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: None, reward: 1.77594646235
Environment.act(): Step data: {'inpu


Environment.step(): t = 6
Environment.act() [POST]: location: (7, 3), heading: (1, 0), action: left, reward: 2.85974266537
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 19, 't': 6, 'action': 'left', 'reward': 2.8597426653682634, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.86)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: forward, reward: 1.23376169014
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'right', 'forward'), 'deadline': 18, 't': 7, 'action': 'forward', 'reward': 1.233761690

Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'forward'), 'deadline': 9, 't': 11, 'action': None, 'reward': 0.9999589179351112, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'forward')
Agent properly idled at a red light. (rewarded 1.00)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (5, 6), heading: (1, 0), action: None, reward: 0.99209010493
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 8, 't': 12, 'action': None, 'reward': 0.9920901049304722, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (re

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.57)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (4, 5), heading: (0, 1), action: None, reward: 1.45728130865
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 17, 't': 8, 'action': None, 'reward': 1.4572813086498921, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.46)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (4, 5), heading: (0, 1), action: None, reward: 1.76821411617
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': No

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.59)
86% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (4, 3), heading: (0, 1), action: None, reward: 1.17020161621
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'right'), 'deadline': 30, 't': 5, 'action': None, 'reward': 1.1702016162056343, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 1.17)
83% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (4, 3), heading: (0, 1), action: None, reward: 2.66510494676
Environment.act(): Step data: {'inputs': {'light': 'red', 'onco

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.57)
80% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 174
\-------------------------

Environment.reset(): Trial set up with start = (4, 7), destination = (1, 6), deadline = 20
Simulating trial. . . 
epsilon = 0.0735; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (5, 7), heading: (1, 0), action: forward, reward: 1.05164934413
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'forward', None), 'deadline': 20, 't': 0, 'action': 'forward', 'reward': 1.0516493441334234, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (reward

\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: None, reward: 2.00966849322
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 5, 't': 15, 'action': None, 'reward': 2.009668493216008, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.01)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (1, 5), heading: (-1, 0), action: None, reward: 2.11880500989
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 4, 't': 16, 'action': None, 'reward': 2.11880500988

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.57)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: forward, reward: -9.23362837136
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 2, 'light': 'red', 'state': ('forward', 'red', None, 'right'), 'deadline': 26, 't': 4, 'action': 'forward', 'reward': -9.233628371362569, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'right')
Agent attempted driving forward through a red light. (rewarded -9.23)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (2, 4), heading: (0, -1), action: right, reward: 0.135044668447
Environment.act(): St

Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 0.68)
33% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
Environment.act() [POST]: location: (3, 3), heading: (1, 0), action: None, reward: 1.46369609261
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 10, 't': 20, 'action': None, 'reward': 1.4636960926122706, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.46)
30% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act() [POST]: location: (3, 4), heading: (0, 1), action: right, reward: 0.565183791233
Environment.act(): Step data: {'inputs': {'light': 'green'


Environment.step(): t = 3
Environment.act() [POST]: location: (5, 5), heading: (0, 1), action: forward, reward: 2.56751300449
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 17, 't': 3, 'action': 'forward', 'reward': 2.567513004486939, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.57)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (5, 6), heading: (0, 1), action: forward, reward: 2.05032943143
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'de


Environment.step(): t = 6
Environment.act() [POST]: location: (6, 7), heading: (-1, 0), action: forward, reward: 1.18157538906
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 14, 't': 6, 'action': 'forward', 'reward': 1.1815753890565888, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.18)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (6, 7), heading: (-1, 0), action: None, reward: -0.103881324479
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', 'forward'), 'deadline': 13, 't': 7, 'action': None, 'r

Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.35)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (6, 2), heading: (-1, 0), action: right, reward: 1.37548969095
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'forward'), 'deadline': 6, 't': 14, 'action': 'right', 'reward': 1.3754896909506809, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 1.38)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: right, reward: 1.35778592542
Environment.act(): Step data: {'inpu

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (7, 7), heading: (0, -1), action: forward, reward: 2.63454617752
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 12, 't': 8, 'action': 'forward', 'reward': 2.6345461775174304, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.63)
55% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 182
\-------------------------

Environment.reset(): Trial set up with start = (2, 4), destination = (7, 7), deadline = 30
Simulating trial. . . 
epsilon = 0.0652; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0652; alph

Agent previous state: ('right', 'green', 'left', 'left')
Agent drove forward instead of right. (rewarded 0.93)
43% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (6, 2), heading: (0, -1), action: right, reward: 1.93517193061
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', None), 'deadline': 13, 't': 17, 'action': 'right', 'reward': 1.935171930610967, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', None)
Agent followed the waypoint right. (rewarded 1.94)
40% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (6, 2), heading: (0, -1), action: None, reward: 0.87890231045
Environment.act(): Step data: {'inputs

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.82)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (5, 6), heading: (1, 0), action: None, reward: 1.42351107782
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', 'left'), 'deadline': 16, 't': 4, 'action': None, 'reward': 1.423511077823718, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', 'left')
Agent properly idled at a red light. (rewarded 1.42)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (5, 6), heading: (1, 0), action: None, reward: 2.49153841034
Environment.act(): Step data: {'inputs': {'

Agent previous state: ('left', 'green', 'right', None)
Agent drove right instead of left. (rewarded 0.62)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (1, 4), heading: (-1, 0), action: right, reward: 1.60890627172
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 12, 't': 8, 'action': 'right', 'reward': 1.608906271721116, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.61)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (1, 3), heading: (0, -1), action: right, reward: 0.96448762953
Environment.act(): Step data: {'inputs': {'light'

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.47)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: None, reward: 2.15725275201
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 11, 't': 9, 'action': None, 'reward': 2.1572527520108915, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.16)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: right, reward: 1.52447421889
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncomin

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.84)
77% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (5, 2), heading: (-1, 0), action: None, reward: 2.8607965578
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 23, 't': 7, 'action': None, 'reward': 2.8607965578013324, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.86)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (5, 2), heading: (-1, 0), action: None, reward: 1.72820835473
Environment.act(): Step data: {'inputs': 

Agent previous state: ('forward', 'green', 'left', 'forward')
Agent drove right instead of forward. (rewarded -0.24)
23% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: right, reward: 1.24641821865
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 7, 't': 23, 'action': 'right', 'reward': 1.2464182186462591, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', None)
Agent drove right instead of left. (rewarded 1.25)
20% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
Environment.act() [POST]: location: (6, 5), heading: (0, 1), action: right, reward: 2.1896079375
Environment.act(): Step data: {'input

Agent previous state: ('left', 'green', None, 'forward')
Agent drove forward instead of left. (rewarded -0.04)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (3, 6), heading: (1, 0), action: None, reward: 2.65476631994
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 11, 't': 9, 'action': None, 'reward': 2.6547663199396245, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 2.65)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (3, 5), heading: (0, -1), action: left, reward: 2.09979519413
Environment.act(): Step data: {'inputs': {'light': 'gre

Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 1.72)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (3, 3), heading: (-1, 0), action: forward, reward: -10.1938186137
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 2, 'light': 'red', 'state': ('right', 'red', 'right', None), 'deadline': 15, 't': 10, 'action': 'forward', 'reward': -10.193818613673606, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'right', None)
Agent attempted driving forward through a red light. (rewarded -10.19)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (3, 2), heading: (0, -1), action: right, reward: 1.45857208759
Environment.act():

Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 1.25)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 4), heading: (0, -1), action: right, reward: 1.83408658969
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 17, 't': 3, 'action': 'right', 'reward': 1.8340865896927472, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 1.83)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (4, 4), heading: (0, -1), action: forward, reward: -10.059645892
Environment.act(): Step data: {'

Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 0.40)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 190
\-------------------------

Environment.reset(): Trial set up with start = (8, 5), destination = (5, 4), deadline = 20
Simulating trial. . . 
epsilon = 0.0578; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0578; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0578; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (7, 5), heading: (-1, 0), action: forward, reward: 1.17706113793
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'forward'), 'deadline':

Agent previous state: ('right', 'red', 'left', 'left')
Agent properly idled at a red light. (rewarded 1.68)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: None, reward: 1.65974404122
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', 'left'), 'deadline': 24, 't': 1, 'action': None, 'reward': 1.6597440412160487, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', 'left')
Agent properly idled at a red light. (rewarded 1.66)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: right, reward: 2.18795466524
Environment.act(): Step data: {'inputs': {'light':

| Training trial 192
\-------------------------

Environment.reset(): Trial set up with start = (5, 3), destination = (3, 6), deadline = 25
Simulating trial. . . 
epsilon = 0.0561; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: right, reward: 1.50444620183
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'forward'), 'deadline': 25, 't': 0, 'action': 'right', 'reward': 1.504446201828292, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 1.50)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 3), heading: (-1, 0), action: forward, reward: 2.0

Agent previous state: ('forward', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 2.58)
91% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (5, 2), heading: (1, 0), action: None, reward: 2.75642999705
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'forward'), 'deadline': 32, 't': 3, 'action': None, 'reward': 2.75642999704895, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'forward')
Agent properly idled at a red light. (rewarded 2.76)
89% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (5, 2), heading: (1, 0), action: None, reward: 1.49312771561
Environment.act(): Step data: {'inputs

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.64)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (1, 5), heading: (1, 0), action: None, reward: 1.23280449685
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', None), 'deadline': 12, 't': 8, 'action': None, 'reward': 1.2328044968494736, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 1.23)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: forward, reward: 0.932651159089
Environment.act(): Step data: {'inputs': {'ligh

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.20)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: None, reward: 1.80256365251
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 26, 't': 4, 'action': None, 'reward': 1.8025636525143138, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.80)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: None, reward: 2.43331971887
Environment.act(): Step data: {'inputs': {'light':

Agent previous state: ('right', 'green', 'forward', 'right')
Agent drove forward instead of right. (rewarded 1.56)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (6, 4), heading: (1, 0), action: right, reward: 2.74985869528
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, 'forward'), 'deadline': 24, 't': 1, 'action': 'right', 'reward': 2.749858695277458, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 2.75)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (6, 5), heading: (0, 1), action: right, reward: 0.617235672448
Environment.act(): Step da

Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 1.05)
40% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 198
\-------------------------

Environment.reset(): Trial set up with start = (4, 5), destination = (2, 2), deadline = 25
Simulating trial. . . 
epsilon = 0.0513; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (4, 6), heading: (0, 1), action: right, reward: 2.16971162155
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, None), 'deadline': 25, 't': 0, 'action': 'right', 'reward': 2.1697116215510572, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, None)
Agent followed the waypoint right. (rewarded 2.17)
96% of time remain

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.61)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (1, 7), heading: (-1, 0), action: left, reward: 2.41796911811
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 15, 't': 5, 'action': 'left', 'reward': 2.4179691181135494, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.42)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (1, 7), heading: (-1, 0), action: None, reward: 2.32535331815
Environment.act(): Step data: {'inputs': {'light': 'red', 'o

Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.35)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (1, 2), heading: (-1, 0), action: right, reward: 1.5988916412
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 21, 't': 4, 'action': 'right', 'reward': 1.598891641199298, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.60)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 2), heading: (-1, 0), action: forward, reward: 2.41764152886
Environment.act(): Step data: {'inputs': {'light': 'gre

Agent previous state: ('forward', 'green', None, 'forward')
Agent followed the waypoint forward. (rewarded 2.47)
70% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 202
\-------------------------

Environment.reset(): Trial set up with start = (2, 5), destination = (5, 4), deadline = 20
Simulating trial. . . 
epsilon = 0.0483; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0483; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: None, reward: 1.10305732584
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 20, 't': 0, 'action': None, 'reward': 1.1030573258447185, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.61)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: left, reward: 1.83419721294
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 13, 't': 7, 'action': 'left', 'reward': 1.8341972129446011, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.83)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: right, reward: 0.464527640135
Environment.act(): Step data: {'inputs': {'light': 'red', 'o

Environment.act() [POST]: location: (1, 2), heading: (-1, 0), action: None, reward: 2.88867176581
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'right', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'left'), 'deadline': 18, 't': 2, 'action': None, 'reward': 2.8886717658121284, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'left')
Agent properly idled at a red light. (rewarded 2.89)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 2), heading: (-1, 0), action: None, reward: 2.1285625678
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'left'), 'deadline': 17, 't': 3, 'action': None, 'reward': 2.1285625678041105, 'waypoint': 'forward'}
A

Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.98)
10% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 205
\-------------------------

Environment.reset(): Trial set up with start = (3, 6), destination = (2, 3), deadline = 20
Simulating trial. . . 
epsilon = 0.0462; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0462; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (2, 6), heading: (-1, 0), action: forward, reward: 2.12992211759
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 20, 't': 0, 'action': 'forward', 'reward': 2.129922117589542, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None

Agent previous state: ('forward', 'red', 'left', 'left')
Agent properly idled at a red light. (rewarded 2.24)
20% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act() [POST]: location: (1, 5), heading: (1, 0), action: None, reward: 1.2184557801
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 4, 't': 16, 'action': None, 'reward': 1.2184557801036178, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.22)
15% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: forward, reward: 1.53363569222
Environment.act(): Step data: {'inputs': {'l

Agent previous state: ('forward', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 0.95)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (3, 5), heading: (0, -1), action: left, reward: 0.425171390811
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', 'right'), 'deadline': 13, 't': 7, 'action': 'left', 'reward': 0.4251713908106016, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', 'right')
Agent drove left instead of forward. (rewarded 0.43)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (4, 5), heading: (1, 0), action: right, reward: 2.83105238631
Environment.act(): Step data:

Agent previous state: ('forward', 'green', 'forward', None)
Agent followed the waypoint forward. (rewarded 2.30)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (6, 7), heading: (-1, 0), action: right, reward: 1.81025231284
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, 'left'), 'deadline': 15, 't': 5, 'action': 'right', 'reward': 1.8102523128440464, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 1.81)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (6, 2), heading: (0, 1), action: left, reward: 2.83879765482
Environment.act(): Step data: {

Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 0.68)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (3, 7), heading: (-1, 0), action: None, reward: 1.02074585262
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 12, 't': 13, 'action': None, 'reward': 1.020745852618907, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.02)
44% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (3, 7), heading: (-1, 0), action: None, reward: 0.734332285856
Environment.act(): Step data: {'inputs': {'ligh

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.47)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (2, 6), heading: (-1, 0), action: None, reward: 1.39272390366
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'right'), 'deadline': 12, 't': 8, 'action': None, 'reward': 1.392723903661152, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'right')
Agent properly idled at a red light. (rewarded 1.39)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (1, 6), heading: (-1, 0), action: forward, reward: 0.861685587616
Environment.act(): Step data: {'inputs': {'l

Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.36)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (8, 5), heading: (1, 0), action: None, reward: 2.74368254296
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 18, 't': 2, 'action': None, 'reward': 2.74368254295944, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.74)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (8, 5), heading: (1, 0), action: left, reward: -10.2329699147
Environment.act(): Step data: {'inputs': {'light': 're

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.89)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: None, reward: 1.83184341169
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'forward'), 'deadline': 21, 't': 4, 'action': None, 'reward': 1.8318434116882882, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', 'forward')
Agent properly idled at a red light. (rewarded 1.83)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: None, reward: 2.57644630088
Environment.act(): Step data: {'input

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.30)
28% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (5, 2), heading: (0, 1), action: right, reward: 0.944881643101
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 7, 't': 18, 'action': 'right', 'reward': 0.9448816431007925, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 0.94)
24% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 213
\-------------------------

Environment.reset(): Trial set up with start = (1, 4), de

Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 0.68)
30% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 214
\-------------------------

Environment.reset(): Trial set up with start = (4, 5), destination = (8, 2), deadline = 35
Simulating trial. . . 
epsilon = 0.0404; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (4, 6), heading: (0, 1), action: right, reward: 2.26590543919
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', None), 'deadline': 35, 't': 0, 'action': 'right', 'reward': 2.2659054391932805, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', None)
Agent followed the waypoint right. (rewarded 2.27)
97% 

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.15)
60% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (2, 6), heading: (-1, 0), action: None, reward: 2.03524529011
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', None), 'deadline': 21, 't': 14, 'action': None, 'reward': 2.0352452901081106, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 2.04)
57% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (2, 6), heading: (-1, 0), action: None, reward: -4.35315428815
Environment.act(): Step data: {'inputs'

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.26)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (6, 2), heading: (-1, 0), action: forward, reward: 1.53440962462
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 16, 't': 4, 'action': 'forward', 'reward': 1.5344096246150478, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.53)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (6, 7), heading: (0, -1), action: right, rewar

Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 0.86)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: right, reward: 1.25443501031
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 9, 't': 11, 'action': 'right', 'reward': 1.2544350103136754, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', None)
Agent drove right instead of left. (rewarded 1.25)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (1, 4), heading: (0, 1), action: right, reward: 2.42570242178
Environment.act(): Step data: {'inputs': {

Agent previous state: ('right', 'red', 'left', None)
Agent followed the waypoint right. (rewarded 2.00)
55% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 219
\-------------------------

Environment.reset(): Trial set up with start = (8, 4), destination = (2, 6), deadline = 20
Simulating trial. . . 
epsilon = 0.0374; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (8, 4), heading: (1, 0), action: None, reward: 1.84218574272
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 20, 't': 0, 'action': None, 'reward': 1.8421857427220574, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 1.84)
95

Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 1.74)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: forward, reward: 1.7842962859
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'right', 'forward'), 'deadline': 19, 't': 1, 'action': 'forward', 'reward': 1.7842962858973526, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'right', 'forward')
Agent followed the waypoint forward. (rewarded 1.78)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (3, 2), heading: (-1, 0), action: None, reward: 1.87483895727
Environment.act():

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.26)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (8, 3), heading: (-1, 0), action: forward, reward: 1.57859897179
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': 1.5785989717896018, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.58)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (8, 2), heading: (0, -1), action: right, reward: 0.545809287998
Environment.act(): Step data: {'inputs':

Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 1.33)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: None, reward: 1.92066021514
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 18, 't': 2, 'action': None, 'reward': 1.9206602151406502, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.92)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: None, reward: 2.6177876688
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 

epsilon = 0.0353; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0353; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0353; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (3, 4), heading: (0, -1), action: forward, reward: 1.70623987215
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'forward', 'right': 'forward', 'left': 'right'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'forward', 'right'), 'deadline': 20, 't': 0, 'action': 'forward', 'reward': 1.706239872148981, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'forward', 'right')
Agent drove forward instead of left. (rewarded 1.71)
95% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 4), heading: (0, -1), action: None, reward: 1.01521006195
Environment.act(): Step data

Agent previous state: ('forward', 'red', 'forward', 'right')
Agent properly idled at a red light. (rewarded 2.83)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: right, reward: 1.52745226184
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', None), 'deadline': 23, 't': 2, 'action': 'right', 'reward': 1.527452261840878, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 1.53)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: None, reward: 1.01798415754
Environment.act(): Step data: {

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.24)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 4), heading: (0, 1), action: None, reward: 1.97906396974
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 23, 't': 2, 'action': None, 'reward': 1.9790639697436683, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.98)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 4), heading: (0, 1), action: None, reward: 1.44954160411
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': No

Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.74)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 5), heading: (0, 1), action: None, reward: 0.931171931105
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', None, 'left'), 'deadline': 24, 't': 1, 'action': None, 'reward': 0.9311719311053717, 'waypoint': 'right'}
Agent previous state: ('right', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 0.93)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (2, 5), heading: (-1, 0), action: right, reward: 1.03578050958
Environment.act(): Step data: {'inputs': {'light': 'red', '

Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 1.56)
32% of time remaining to reach destination.

/-------------------
| Step 17 Results
\-------------------

Environment.step(): t = 17
Environment.act() [POST]: location: (6, 5), heading: (0, 1), action: None, reward: 0.760838994666
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'right', None), 'deadline': 8, 't': 17, 'action': None, 'reward': 0.7608389946660159, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 0.76)
28% of time remaining to reach destination.

/-------------------
| Step 18 Results
\-------------------

Environment.step(): t = 18
Environment.act() [POST]: location: (6, 5), heading: (0, 1), action: None, reward: 0.866357971577
Environment.act(): Step data: {'inputs': {'light': 'red'

Agent previous state: ('forward', 'green', 'forward', None)
Agent drove right instead of forward. (rewarded 1.80)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (2, 5), heading: (0, 1), action: left, reward: 1.45546698766
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'forward'), 'deadline': 15, 't': 5, 'action': 'left', 'reward': 1.4554669876610362, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'forward')
Agent followed the waypoint left. (rewarded 1.46)
70% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (3, 5), heading: (1, 0), action: left

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.00)
45% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 229
\-------------------------

Environment.reset(): Trial set up with start = (1, 2), destination = (6, 7), deadline = 20
Simulating trial. . . 
epsilon = 0.0322; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0322; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0322; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (1, 2), heading: (-1, 0), action: None, reward: 2.5094771338
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'right'), 'deadline': 20, 't': 0, 'action': None, 'reward': 2.5094771338010293, 'waypoint': 'forward'}
A

Agent previous state: ('forward', 'green', 'right', 'forward')
Agent followed the waypoint forward. (rewarded 1.90)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (7, 6), heading: (0, 1), action: right, reward: 1.56258426952
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'forward', 'left'), 'deadline': 16, 't': 4, 'action': 'right', 'reward': 1.5625842695210066, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'forward', 'left')
Agent drove right instead of forward. (rewarded 1.56)
75% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 6), heading: (1, 0), action: left, reward: 1.40340952817
Environment.act(): Ste

Agent previous state: ('left', 'red', 'left', 'right')
Agent properly idled at a red light. (rewarded 0.96)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (6, 7), heading: (1, 0), action: left, reward: 2.63269266177
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', None), 'deadline': 16, 't': 9, 'action': 'left', 'reward': 2.6326926617719497, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', None)
Agent followed the waypoint left. (rewarded 2.63)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: forward, reward: 1.67510171692
Environment.act(): Step data: {'inputs': {'light'

Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 0.36)
4% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
Environment.act() [POST]: location: (7, 5), heading: (0, -1), action: None, reward: 0.246817349775
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 1, 't': 24, 'action': None, 'reward': 0.24681734977523728, 'waypoint': 'left'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 0.25)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 232
\-------------------------

Environment.reset(): Trial set up with start = (7

Agent previous state: ('right', 'red', None, None)
Agent followed the waypoint right. (rewarded 1.74)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (1, 7), heading: (0, -1), action: forward, reward: 0.965415808898
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'right', None), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': 0.9654158088978502, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'right', None)
Agent followed the waypoint forward. (rewarded 0.97)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (1, 6), heading: (0, -1), action: f

57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: None, reward: 2.0440295209
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 17, 't': 13, 'action': None, 'reward': 2.044029520896732, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.04)
53% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (7, 7), heading: (1, 0), action: None, reward: 1.566377923
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red',

Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.82)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (1, 2), heading: (-1, 0), action: forward, reward: 1.67846808923
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': 1.6784680892339578, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.68)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (8, 2), heading: (-1, 0), action: forward, reward: 2.27443688803
Environment.act(): Step da

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.89)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (1, 4), heading: (0, 1), action: None, reward: 1.8293914411
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 22, 't': 8, 'action': None, 'reward': 1.8293914410999321, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.83)
70% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: left, reward: 2.43966425595
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': N

Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 8, 't': 22, 'action': None, 'reward': 2.508348574110438, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 2.51)
23% of time remaining to reach destination.

/-------------------
| Step 23 Results
\-------------------

Environment.step(): t = 23
Environment.act() [POST]: location: (2, 7), heading: (0, 1), action: forward, reward: 2.04889130277
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 7, 't': 23, 'action': 'forward', 'reward': 2.0488913027655817, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (re

Agent previous state: ('forward', 'green', 'forward', None)
Agent drove right instead of forward. (rewarded 1.42)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (8, 6), heading: (-1, 0), action: left, reward: 2.74308878374
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, 'left'), 'deadline': 21, 't': 4, 'action': 'left', 'reward': 2.743088783740494, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 2.74)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 6), heading: (-1, 0), action: None, reward: 2.86130554351
Environment.act(): Step data: {'inputs': {

Agent previous state: ('right', 'red', 'left', None)
Agent followed the waypoint right. (rewarded 0.91)
16% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 239
\-------------------------

Environment.reset(): Trial set up with start = (6, 3), destination = (4, 7), deadline = 20
Simulating trial. . . 
epsilon = 0.0277; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (6, 3), heading: (0, 1), action: None, reward: 1.6629779807
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'forward', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', 'left'), 'deadline': 20, 't': 0, 'action': None, 'reward': 1.6629779806950655, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', 'left')
Agent properly idled at a red light. (rewarded 1.66)
95% of 

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.96)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (7, 6), heading: (-1, 0), action: None, reward: 2.71506205171
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 17, 't': 3, 'action': None, 'reward': 2.7150620517091215, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 2.72)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (7, 7), heading: (0, 1), action: left, reward: 1.3622542042
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncomi

Agent previous state: ('left', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.30)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: left, reward: 1.94276770037
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 20, 't': 5, 'action': 'left', 'reward': 1.9427677003727342, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.94)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (4, 6), heading: (0, -1), action: None, reward: 1.71263846154
Environment.act(): Step data: {'inputs': {'light': 'red', 

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.38)
20% of time remaining to reach destination.

/-------------------
| Step 20 Results
\-------------------

Environment.step(): t = 20
Environment.act() [POST]: location: (8, 4), heading: (0, 1), action: None, reward: 2.18492929092
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 5, 't': 20, 'action': None, 'reward': 2.1849292909220885, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.18)
16% of time remaining to reach destination.

/-------------------
| Step 21 Results
\-------------------

Environment.step(): t = 21
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 5), heading: (0, 1), action: forward, reward: 2.2424

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.14)
69% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: left, reward: 1.36314795837
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 24, 't': 11, 'action': 'left', 'reward': 1.3631479583666655, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.36)
66% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (8, 7), heading: (1, 0), action: None, reward: 1.77510355647
Environment.act(): Step data: {'inputs': {'light': 'red', 'o

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.70)
87% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act() [POST]: location: (5, 2), heading: (-1, 0), action: left, reward: 1.01278108983
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 26, 't': 4, 'action': 'left', 'reward': 1.0127810898261498, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.01)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (4, 2), heading: (-1, 0), action: forward, reward: 2.14877712219
Environment.act(): Step data: {'inputs': {'light': 'green', 

Agent previous state: ('right', 'green', 'forward', None)
Agent followed the waypoint right. (rewarded 2.38)
70% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (4, 2), heading: (1, 0), action: right, reward: 2.01840137262
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 21, 't': 9, 'action': 'right', 'reward': 2.01840137261724, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 2.02)
67% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (5, 2), heading: (1, 0), action: forward, reward: 2.21556185445
Environment.act(): Step data: {'inputs': {'li

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.97)
97% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (3, 3), heading: (1, 0), action: None, reward: 2.46327722677
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'left'), 'deadline': 29, 't': 1, 'action': None, 'reward': 2.4632772267679526, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 2.46)
93% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (3, 3), heading: (1, 0), action: None, reward: 2.93892278225
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncomin


Environment.step(): t = 2
Environment.act() [POST]: location: (1, 2), heading: (0, -1), action: left, reward: 2.19799821512
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 18, 't': 2, 'action': 'left', 'reward': 2.1979982151172783, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 2.20)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (1, 2), heading: (0, -1), action: None, reward: 2.49840892337
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 17, 't': 3, 'action': None, 'reward': 2.4984089233730424, 'waypoint': 'forward'

Agent followed the waypoint left. (rewarded 2.16)
40% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Training trial 248
\-------------------------

Environment.reset(): Trial set up with start = (3, 2), destination = (8, 6), deadline = 25
Simulating trial. . . 
epsilon = 0.0242; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0242; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0242; alpha = 0.0150
Simulating trial. . . 
epsilon = 0.0242; alpha = 0.0150

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (3, 3), heading: (0, 1), action: forward, reward: 0.249165981459
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', 'right', None), 'deadline': 25, 't': 0, 'action': 'forward', 'reward': 0.24916598145874058, 'waypoint': 

Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 2.88)
73% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: None, reward: 2.44478342693
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 22, 't': 8, 'action': None, 'reward': 2.4447834269317195, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.44)
70% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: None, reward: 2.75305557744
Environment.act(): Step data: {'inputs': {'light': 're

Environment.act() [POST]: location: (2, 5), heading: (1, 0), action: None, reward: 1.40549306009
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'left', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 26, 't': 4, 'action': None, 'reward': 1.4054930600918167, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.41)
83% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (3, 5), heading: (1, 0), action: forward, reward: 2.05567477356
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', 'left', None), 'deadline': 25, 't': 5, 'action': 'forward', 'reward': 2.055674773557644, 'waypoint': 'forward'}


Agent previous state: ('right', 'green', 'left', None)
Agent followed the waypoint right. (rewarded 2.72)
68% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (2, 6), heading: (-1, 0), action: None, reward: 1.34201975494
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 17, 't': 8, 'action': None, 'reward': 1.3420197549415485, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.34)
64% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (1, 6), heading: (-1, 0), action: forward, reward: 1.62607353588
Environment.act(): Step data: {'inputs': {'light': 'gre

Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 0.79)
4% of time remaining to reach destination.

/-------------------
| Step 24 Results
\-------------------

Environment.step(): t = 24
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: None, reward: 1.887925366
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', 'left'), 'deadline': 1, 't': 24, 'action': None, 'reward': 1.8879253660027635, 'waypoint': 'left'}
Environment.step(): Primary agent ran out of time! Trial aborted.
Agent previous state: ('left', 'red', 'forward', 'left')
Agent properly idled at a red light. (rewarded 1.89)
0% of time remaining to reach destination.

Trial Aborted!
Agent did not reach the destination.

/-------------------------
| Training trial 252
\-------------------------

Environment.reset(): Trial set up

Agent previous state: ('left', 'green', 'forward', None)
Agent drove forward instead of left. (rewarded 1.80)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (7, 4), heading: (0, -1), action: None, reward: 1.70022316134
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 13, 't': 12, 'action': None, 'reward': 1.7002231613440417, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.70)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (6, 4), heading: (-1, 0), action: left, reward: 2.62985120374
Environment.act(): Step data: {'inputs': {'light': 'green'

Agent previous state: ('forward', 'red', 'forward', None)
Agent drove right instead of forward. (rewarded 1.31)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 3), heading: (1, 0), action: right, reward: 0.926222252016
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'forward', None), 'deadline': 9, 't': 11, 'action': 'right', 'reward': 0.9262222520157031, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'forward', None)
Agent drove right instead of left. (rewarded 0.93)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (1, 4), heading: (0, 1), action: right, reward: 2.32369321347
Environment.act(): Step data: {'input

Agent previous state: ('right', 'red', None, None)
Agent followed the waypoint right. (rewarded 1.04)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (3, 7), heading: (-1, 0), action: forward, reward: 1.34562521418
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 10, 't': 10, 'action': 'forward', 'reward': 1.345625214182236, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.35)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (3, 6), heading: (0, -1), action: right, reward: 2.27948385297
Environment.act(): Step data: {'inputs'

Agent drove forward instead of right. (rewarded 0.76)
52% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (3, 5), heading: (0, 1), action: right, reward: 2.14013534509
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'right', None), 'deadline': 13, 't': 12, 'action': 'right', 'reward': 2.140135345085361, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'right', None)
Agent followed the waypoint right. (rewarded 2.14)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (3, 5), heading: (0, 1), action: None, reward: 1.51410097763
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': '

Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 2.85)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (4, 3), heading: (0, 1), action: None, reward: 2.46649651769
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', None), 'deadline': 23, 't': 2, 'action': None, 'reward': 2.46649651768948, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 2.47)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 3), heading: (0, 1), action: None, reward: 2.3562945237
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming

Agent previous state: ('forward', 'green', None, 'left')
Agent drove right instead of forward. (rewarded 1.16)
65% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (1, 6), heading: (1, 0), action: forward, reward: 1.79837308045
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': 'left', 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', 'left', 'forward'), 'deadline': 13, 't': 7, 'action': 'forward', 'reward': 1.7983730804475146, 'waypoint': 'left'}
Agent previous state: ('left', 'green', 'left', 'forward')
Agent drove forward instead of left. (rewarded 1.80)
60% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (1, 5), heading: (0, -1), action: left, reward: 0.864107002902
Environment.act(): Step d

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.97)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 3), heading: (0, -1), action: None, reward: 1.29690931664
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 18, 't': 2, 'action': None, 'reward': 1.2969093166370385, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.30)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: left, reward: 1.79588686173
Environment.act(): Step data: {'inputs': {'light': 'green', 'onco

Agent previous state: ('left', 'green', 'right', None)
Agent drove right instead of left. (rewarded 0.48)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: left, reward: 1.97792303762
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 19, 't': 6, 'action': 'left', 'reward': 1.9779230376243488, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.98)
72% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: None, reward: 1.63394903278
Environment.act(): Step data: {'inputs': {'light': 'red', 'o

Agent previous state: ('forward', 'green', 'right', 'forward')
Agent followed the waypoint forward. (rewarded 2.34)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 2), heading: (0, -1), action: right, reward: 1.13787900816
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('right', 'green', None, None), 'deadline': 22, 't': 3, 'action': 'right', 'reward': 1.1378790081637198, 'waypoint': 'right'}
Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.14)
84% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (4, 7), heading: (0, -1), action: forward, rew

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.16)
90% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 3), heading: (0, -1), action: None, reward: 2.8810642251
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'right', None), 'deadline': 18, 't': 2, 'action': None, 'reward': 2.881064225099715, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 2.88)
85% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (4, 3), heading: (-1, 0), action: left, reward: 2.94140238371
Environment.act(): Step data: {'inputs': {'light': 'green', 'on


/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (5, 7), heading: (0, 1), action: None, reward: 1.64759498806
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'left', 'left': 'left'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'left'), 'deadline': 16, 't': 9, 'action': None, 'reward': 1.6475949880567542, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'left')
Agent properly idled at a red light. (rewarded 1.65)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (5, 7), heading: (0, 1), action: None, reward: 1.23331114347
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': 'left', 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', 'right'), 'deadline

Agent previous state: ('right', 'green', 'forward', None)
Agent followed the waypoint right. (rewarded 2.17)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (1, 4), heading: (1, 0), action: None, reward: 2.07736444312
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'right', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'right', None), 'deadline': 24, 't': 1, 'action': None, 'reward': 2.077364443115069, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'right', None)
Agent properly idled at a red light. (rewarded 2.08)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (1, 4), heading: (1, 0), action: None, reward: 1.62924326839
Environment.act(): Step data: {'inputs': {'light'

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.07)
80% of time remaining to reach destination.

/-------------------
| Step 7 Results
\-------------------

Environment.step(): t = 7
Environment.act() [POST]: location: (5, 4), heading: (0, 1), action: left, reward: 1.5793837176
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 28, 't': 7, 'action': 'left', 'reward': 1.579383717597121, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.58)
77% of time remaining to reach destination.

/-------------------
| Step 8 Results
\-------------------

Environment.step(): t = 8
Environment.act() [POST]: location: (4, 4), heading: (-1, 0), action: right, reward: 0.165365864543
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncom

Agent previous state: ('right', 'green', None, 'forward')
Agent followed the waypoint right. (rewarded 1.67)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (7, 3), heading: (1, 0), action: None, reward: 2.28632521589
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, None), 'deadline': 23, 't': 2, 'action': None, 'reward': 2.286325215885305, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.29)
88% of time remaining to reach destination.

/-------------------
| Step 3 Results
\-------------------

Environment.step(): t = 3
Environment.act() [POST]: location: (7, 3), heading: (1, 0), action: None, reward: 2.9081384386
Environment.act(): Step data: {'inputs': {'light': 'red', '

Agent previous state: ('forward', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.60)
36% of time remaining to reach destination.

/-------------------
| Step 16 Results
\-------------------

Environment.step(): t = 16
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (2, 3), heading: (1, 0), action: forward, reward: 1.11398235072
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 9, 't': 16, 'action': 'forward', 'reward': 1.1139823507184619, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.11)
32% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 8
\-------------------------

Environment.reset(): Trial set up with start 

Agent previous state: ('forward', 'green', 'left', None)
Agent followed the waypoint forward. (rewarded 1.60)
80% of time remaining to reach destination.

/-------------------
| Step 4 Results
\-------------------

Environment.step(): t = 4
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (6, 4), heading: (0, 1), action: right, reward: 2.27957610397
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'left', None), 'deadline': 16, 't': 4, 'action': 'right', 'reward': 2.279576103973143, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'left', None)
Agent followed the waypoint right. (rewarded 2.28)
75% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 10
\-------------------------

Environment.reset(): Trial set up with start = (3, 4), de

Agent previous state: ('right', 'red', 'forward', None)
Agent followed the waypoint right. (rewarded 1.25)
60% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (8, 2), heading: (1, 0), action: None, reward: 2.37101955494
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, 'forward'), 'deadline': 18, 't': 12, 'action': None, 'reward': 2.3710195549386817, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.37)
57% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act(): Primary agent has reached destination!
Environment.act() [POST]: location: (8, 7), heading: (0, -1), action: left, reward: 1.3

Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 0.91)
48% of time remaining to reach destination.

/-------------------
| Step 13 Results
\-------------------

Environment.step(): t = 13
Environment.act() [POST]: location: (2, 3), heading: (0, 1), action: None, reward: 1.03339665822
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'right', 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 12, 't': 13, 'action': None, 'reward': 1.0333966582231962, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 1.03)
44% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (2, 3), heading: (0, 1), action: None, reward: 1.22610460424
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncomi

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 2.22)
60% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (2, 4), heading: (1, 0), action: None, reward: 1.77203166683
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', 'left', None), 'deadline': 15, 't': 10, 'action': None, 'reward': 1.7720316668280949, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', 'left', None)
Agent properly idled at a red light. (rewarded 1.77)
56% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (3, 4), heading: (1, 0), action: forward, reward: 1.09696149305
Environment.act(): Step data: {'inputs': {'l

Agent previous state: ('left', 'green', None, 'left')
Agent followed the waypoint left. (rewarded 2.03)
45% of time remaining to reach destination.

/-------------------
| Step 11 Results
\-------------------

Environment.step(): t = 11
Environment.act() [POST]: location: (1, 2), heading: (1, 0), action: forward, reward: 1.73914882775
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('forward', 'green', None, None), 'deadline': 9, 't': 11, 'action': 'forward', 'reward': 1.7391488277470522, 'waypoint': 'forward'}
Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.74)
40% of time remaining to reach destination.

/-------------------
| Step 12 Results
\-------------------

Environment.step(): t = 12
Environment.act() [POST]: location: (2, 2), heading: (1, 0), action: forward, reward: 1.07515949651
Environment.act(): Step data: {'input

Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.60)
86% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 2), heading: (-1, 0), action: left, reward: 1.17277856699
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 30, 't': 5, 'action': 'left', 'reward': 1.1727785669859696, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.17)
83% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (8, 2), heading: (-1, 0), action: None, reward: 2.53601362523
Environment.act(): Step data: {'inputs': {'light': 'red', 'on

Agent previous state: ('forward', 'green', None, 'forward')
Agent followed the waypoint forward. (rewarded 2.52)
80% of time remaining to reach destination.

Trial Completed!
Agent reached the destination.

/-------------------------
| Testing trial 16
\-------------------------

Environment.reset(): Trial set up with start = (1, 7), destination = (6, 6), deadline = 20
Simulating trial. . . 
epsilon = 0.0000; alpha = 0.0000

/-------------------
| Step 0 Results
\-------------------

Environment.step(): t = 0
Environment.act() [POST]: location: (1, 7), heading: (1, 0), action: None, reward: 2.34286264942
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', None, None), 'deadline': 20, 't': 0, 'action': None, 'reward': 2.3428626494180773, 'waypoint': 'left'}
Agent previous state: ('left', 'red', None, None)
Agent properly idled at a red light. (rewarded 2.34)
95% of time remaini

Agent previous state: ('left', 'red', 'left', 'forward')
Agent properly idled at a red light. (rewarded 1.23)
30% of time remaining to reach destination.

/-------------------
| Step 14 Results
\-------------------

Environment.step(): t = 14
Environment.act() [POST]: location: (7, 5), heading: (0, -1), action: None, reward: 2.28859178962
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'left', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('left', 'red', 'left', 'right'), 'deadline': 6, 't': 14, 'action': None, 'reward': 2.288591789620517, 'waypoint': 'left'}
Agent previous state: ('left', 'red', 'left', 'right')
Agent properly idled at a red light. (rewarded 2.29)
25% of time remaining to reach destination.

/-------------------
| Step 15 Results
\-------------------

Environment.step(): t = 15
Environment.act() [POST]: location: (7, 5), heading: (0, -1), action: None, reward: 2.08414467828
Environment.act(): Step data: {'inputs': {'li

Agent previous state: ('right', 'green', None, None)
Agent followed the waypoint right. (rewarded 1.75)
96% of time remaining to reach destination.

/-------------------
| Step 1 Results
\-------------------

Environment.step(): t = 1
Environment.act() [POST]: location: (5, 2), heading: (-1, 0), action: right, reward: 1.46968816473
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': 'right'}, 'violation': 0, 'light': 'red', 'state': ('right', 'red', 'forward', 'right'), 'deadline': 24, 't': 1, 'action': 'right', 'reward': 1.4696881647297761, 'waypoint': 'right'}
Agent previous state: ('right', 'red', 'forward', 'right')
Agent followed the waypoint right. (rewarded 1.47)
92% of time remaining to reach destination.

/-------------------
| Step 2 Results
\-------------------

Environment.step(): t = 2
Environment.act() [POST]: location: (5, 7), heading: (0, -1), action: right, reward: 0.633177582925
Environment.act(): Step data: {'inputs

Agent previous state: ('forward', 'green', None, None)
Agent followed the waypoint forward. (rewarded 1.07)
80% of time remaining to reach destination.

/-------------------
| Step 5 Results
\-------------------

Environment.step(): t = 5
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: None, reward: 2.56436148151
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': None, 'right': 'forward', 'left': 'forward'}, 'violation': 0, 'light': 'red', 'state': ('forward', 'red', None, 'forward'), 'deadline': 20, 't': 5, 'action': None, 'reward': 2.5643614815125133, 'waypoint': 'forward'}
Agent previous state: ('forward', 'red', None, 'forward')
Agent properly idled at a red light. (rewarded 2.56)
76% of time remaining to reach destination.

/-------------------
| Step 6 Results
\-------------------

Environment.step(): t = 6
Environment.act() [POST]: location: (8, 3), heading: (1, 0), action: None, reward: 1.4155477778
Environment.act(): Step data: {'inputs'

Agent properly idled at a red light. (rewarded 1.36)
55% of time remaining to reach destination.

/-------------------
| Step 9 Results
\-------------------

Environment.step(): t = 9
Environment.act() [POST]: location: (5, 3), heading: (0, -1), action: left, reward: 1.86235757542
Environment.act(): Step data: {'inputs': {'light': 'green', 'oncoming': None, 'right': 'left', 'left': None}, 'violation': 0, 'light': 'green', 'state': ('left', 'green', None, None), 'deadline': 11, 't': 9, 'action': 'left', 'reward': 1.8623575754227448, 'waypoint': 'left'}
Agent previous state: ('left', 'green', None, None)
Agent followed the waypoint left. (rewarded 1.86)
50% of time remaining to reach destination.

/-------------------
| Step 10 Results
\-------------------

Environment.step(): t = 10
Environment.act() [POST]: location: (6, 3), heading: (1, 0), action: right, reward: 1.7314082884
Environment.act(): Step data: {'inputs': {'light': 'red', 'oncoming': 'forward', 'right': None, 'left': None},