In [1]:
from smartcab.simulator import Simulator
from smartcab.environment import Environment
from smartcab.agents import QLearningAgent

# Set up environment and agent
e = Environment()  # create environment (also adds some dummy traffic)
a = e.create_agent(QLearningAgent)  # create agent
e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track

# Now simulate it
sim = Simulator(e, update_delay=0.001, display=False)  # create simulator
sim.run(n_trials=1)  # run for a specified number of trials

a.stats_print()



Simulator.run(): Trial 0
Environment.reset(): Trial set up with start = (8, 2), destination = (5, 3), deadline = 20
RoutePlanner.route_to(): destination = (5, 3)
Environment.step(): Primary agent ran out of time! Trial aborted.
LearningAgent stats: q_values_count = 0, reward_cum = -9.5
0
1
   iteration  q_size  cum_reward success
0          1      15        -9.5   False


<h2>Improve the Q-Learning Driving Agent</h2>

Now let's tune the values for *learning rate (alpha)*, *the discount factor (gamma)* and the *exploration rate (epsilon)*. We will perform many simulations with many combinations of these parameters, and the we will report the results to see what is the best combination.

In [11]:
import numpy as np

for alpha_rate in np.arange(0.00, 1.00, 0.05):
    for epsilon_rate in np.arange(0.00, 1.00, 0.05):
        for gamma_rate in np.arange(0.00, 1.00, 0.05):
            e = Environment()
            a = QLearningAgent(e, alpha_rate=alpha_rate, epsilon_rate=epsilon_rate, gamma_rate=gamma_rate)
            e.set_primary_agent(a, enforce_deadline=True)
            s = Simulator(e, update_delay=0.001, display=False)
            s.run(n_trials=1)

Simulator.run(): Trial 0
Environment.reset(): Trial set up with start = (8, 1), destination = (3, 1), deadline = 25


KeyError: <smartcab.agents.QLearningAgent object at 0x7fd78aefbe10>