# Solving CartPole using Q Learning Tabular Method

In [1]:
import gym 
import numpy as np 
import math 
import random

In [2]:
env = gym.make('CartPole-v0')

In [3]:
result = []
observ = env.reset()
for t in range(300):
    env.render()
    action = env.action_space.sample()
    observ, reward, done, info = env.step(action)
    result.append([action, reward])



In [4]:
env.close()

In [5]:
#actions are move cart left or right 
print(env.action_space)

Discrete(2)


In [6]:
# observation space: has four values 
'''
position of cart
velocity of cart
angle of pole
angular velocity of pole 
'''
print(env.observation_space)

Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)


In [7]:
print(env.observation_space.low)

[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]


In [8]:
print(env.observation_space.high)

[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]


######  Discretize the state space so we can apply Q learning to a bounded space

In [9]:
num_buckets = [1,1,6,3]
num_actions = env.action_space.n
state_bounds = list(zip(env.observation_space.low, env.observation_space.high))

In [10]:
# amend the state bound ranges for velocity and pole angle 
state_bounds[1] = [-0.5, 0.5]
state_bounds[3] = [-math.radians(50),math.radians(50)]
state_bounds

[(-4.8, 4.8),
 [-0.5, 0.5],
 (-0.41887903, 0.41887903),
 [-0.8726646259971648, 0.8726646259971648]]

In [11]:
q_table = np.zeros(num_buckets +[num_actions])
print(q_table.shape)
q_table

(1, 1, 6, 3, 2)


array([[[[[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]]]]])

In [12]:
explore_rate_min = 0.01
learning_rate_min = 0.1

In [13]:
def get_explore_rate(t):
    return max(explore_rate_min, min(1, 1.0-math.log10((t+1)/25)))

In [14]:
def get_learning_rate(t):
    return max(learning_rate_min,min(1, 1.0-math.log10((t+1)/25)))

In [15]:
def select_action(state, explore_rate):
    if random.random() < explore_rate:
        action = env.action_space.sample()
    else: 
        action = np.argmax(q_table[state])
    return action

In [16]:
def state_to_bucket(state):
    bucket_indices =[]
    for i in range(len(state)):
        if state[i]<=state_bounds[i][0]:
            bucket_index = 0
        elif  state[i]>=state_bounds[i][1]:
            bucket_index = num_buckets[i]-1
        else:
            bound_width = state_bounds[i][1]-state_bounds[i][0]
            offset = (num_buckets[i]-1)* state_bounds[i][0]/bound_width
            scaling = (num_buckets[i]-1)/bound_width
            bucket_index = int(round(scaling*state[i]-offset))

        bucket_indices.append(bucket_index)

    return tuple(bucket_indices)

In [17]:
def simulate():
    learning_rate = get_learning_rate(0)
    explore_rate = get_explore_rate(0)
    
    discount_factor = 0.99
    num_streaks = 0
    
    for episode in range(1000):
        observ = env.reset()
        state_0 = state_to_bucket(observ)
        for t in range(300):
            env.render()
            action = select_action(state_0, explore_rate)
            observ, reward, done, info = env.step(action)
            state = state_to_bucket(observ)
            best_q = np.amax(q_table[state])
            
            q_table[state_0 + (action,)] += \
            learning_rate* (reward + discount_factor*(best_q) - q_table[state_0+ (action, ) ])
            
            state_0 = state
            
            print("\nEpisode = %d" % episode)
            print('t=%d' %t)
            print('Action: %d' %action)
            print('State:', state)
            print('Reward:', reward)
            print('Best Q:', best_q)
            print('Explore rate:', explore_rate)
            print('Learning rate:', learning_rate)
            print('Streaks:', num_streaks)
            
            print('')
            
            if done: 
                print("Episode %d finished after %f time steps" %(episode, t))
                
                if (t>=199):
                    num_streaks +=1
                else:
                    num_streaks = 0
                break
        if num_streaks >120:
            break
        explore_rate = get_explore_rate(episode)
        learning_rate = get_learning_rate(episode)


In [18]:
simulate()


Episode = 0
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 0.0
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=1
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 1.0
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 1.99
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 2.9701
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 3.9403989999999998
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 0.0
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 2.9701
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=7
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 2.9701
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 0
t=8
Action: 0
State: (0, 0, 3, 1)



Episode = 2
t=23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 10.466174574128358
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 2
t=24
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 11.361512828387074
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 2
t=25
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 12.247897700103204
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 2
t=26
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 13.125418723102172
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 2
t=27
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 13.125418723102172
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 2
t=28
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 13.99416453587115
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 2
t=29
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 14.854222890512439
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 2
t=30
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 15.705680


Episode = 5
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 28.22694674017248
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 5
t=7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 15.705680661607312
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 5
t=8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 28.22694674017248
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 5
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 28.944677272770754
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 5
t=10
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 16.54862385499124
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 5
t=11
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 16.54862385499124
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 5
t=12
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 17.383137616441328
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 5
t=13
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 18.2093062402769

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 27.501966404214627
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 9
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 22.995685419484467
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 9
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 27.501966404214627
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 9
t=3
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 27.501966404214627
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 9
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 28.22694674017248
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 9
t=5
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 28.944677272770754
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 9
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 29.655230500043047
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 9
t=7
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 30.358678195042614
Explore rate: 1
Learning 


Episode = 10
t=30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 41.29632180625153
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 10
t=31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 41.29632180625153
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 10
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 41.88335858818901
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 10
t=33
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 42.46452500230712
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 10
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 43.03987975228405
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 10
t=35
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 43.60948095476121
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 10
t=36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 44.1733861452136
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 10
t=37
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 44.73165

Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=24
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 43.60948095476121
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=25
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 43.60948095476121
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=26
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 44.1733861452136
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=27
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 44.73165228376146
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=28
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 45.284335760923845
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=29
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 44.1733861452136
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=30
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 44.73165228376146
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 12
t=31
Action: 0
Sta


Episode = 16
t=8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 57.011098647610595
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 16
t=9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 57.440987661134486
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 16
t=10
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 57.86657778452314
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 16
t=11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 49.00142537504343
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 16
t=12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 49.00142537504343
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 16
t=13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 49.511411121292994
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 16
t=14
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 50.016297010080066
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 16
t=15
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 50.51

Episode = 19
t=1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 51.01097269957947
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 19
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 57.86657778452314
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 19
t=3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 58.28791200667791
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 19
t=4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 58.70503288661113
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 19
t=5
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 58.70503288661113
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 19
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 59.117982557745016
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 19
t=7
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 59.52680273216757
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 19
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 59.931534704845

Episode = 21
t=10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 65.19068855075575
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 21
t=11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 65.19068855075575
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 21
t=12
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 65.5387816652482
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 21
t=13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 65.88339384859572
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 21
t=14
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 10.466174574128356
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 21
t=15
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 10.466174574128356
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 21
t=16
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 11.361512828387072
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 21
t=17
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 12.247


Episode = 24
t=32
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 69.14555293653484
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 24
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 69.4540974071695
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 24
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 69.4540974071695
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 24
t=35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 69.7595564330978
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 24
t=36
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 70.06196086876682
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 24
t=37
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 69.7595564330978
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 24
t=38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 70.06196086876682
Explore rate: 1
Learning rate: 1
Streaks: 0


Episode = 24
t=39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 70.06196086


Episode = 26
t=23
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 75.25351659369137
Explore rate: 0.9829666607012196
Learning rate: 0.9829666607012196
Streaks: 0


Episode = 26
t=24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 75.49265150852662
Explore rate: 0.9829666607012196
Learning rate: 0.9829666607012196
Streaks: 0


Episode = 26
t=25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 74.77110760713111
Explore rate: 0.9829666607012196
Learning rate: 0.9829666607012196
Streaks: 0


Episode = 26
t=26
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 74.77110760713111
Explore rate: 0.9829666607012196
Learning rate: 0.9829666607012196
Streaks: 0


Episode = 26
t=27
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 74.86184186793218
Explore rate: 0.9829666607012196
Learning rate: 0.9829666607012196
Streaks: 0


Episode = 26
t=28
Action: 0
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 75.10894158148476
Explore rate: 0.9829666607012196
Learning rate: 0.9829666607012196
Streaks: 0


Epi

State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 74.13689749652778
Explore rate: 0.9665762445130502
Learning rate: 0.9665762445130502
Streaks: 0


Episode = 27
t=44
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 74.38688410142039
Explore rate: 0.9665762445130502
Learning rate: 0.9665762445130502
Streaks: 0


Episode = 27
t=45
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 74.63445439517565
Explore rate: 0.9665762445130502
Learning rate: 0.9665762445130502
Streaks: 0


Episode = 27
t=46
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 16.534265717568736
Explore rate: 0.9665762445130502
Learning rate: 0.9665762445130502
Streaks: 0


Episode = 27
t=47
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 16.534265717568736
Explore rate: 0.9665762445130502
Learning rate: 0.9665762445130502
Streaks: 0

Episode 27 finished after 47.000000 time steps

Episode = 28
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 75.59174942039141
Explore rate: 0.9507819773298184
Learning rate: 0.95078197732981


Episode = 30
t=30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 80.60189215803993
Explore rate: 0.9208187539523752
Learning rate: 0.9208187539523752
Streaks: 0


Episode = 30
t=31
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 73.26730513113606
Explore rate: 0.9208187539523752
Learning rate: 0.9208187539523752
Streaks: 0


Episode = 30
t=32
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 73.26730513113606
Explore rate: 0.9208187539523752
Learning rate: 0.9208187539523752
Streaks: 0


Episode = 30
t=33
Action: 1
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 73.26730513113606
Explore rate: 0.9208187539523752
Learning rate: 0.9208187539523752
Streaks: 0


Episode = 30
t=34
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 80.43387929321342
Explore rate: 0.9208187539523752
Learning rate: 0.9208187539523752
Streaks: 0


Episode = 30
t=35
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 80.43387929321342
Explore rate: 0.9208187539523752
Learning rate: 0.9208187539523752
Streaks: 0


Epi

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 81.1916282142032
Explore rate: 0.8927900303521317
Learning rate: 0.8927900303521317
Streaks: 0


Episode = 32
t=23
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 81.34286007811576
Explore rate: 0.8927900303521317
Learning rate: 0.8927900303521317
Streaks: 0


Episode = 32
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 81.49321559974733
Explore rate: 0.8927900303521317
Learning rate: 0.8927900303521317
Streaks: 0


Episode = 32
t=25
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 80.91780130205522
Explore rate: 0.8927900303521317
Learning rate: 0.8927900303521317
Streaks: 0


Episode = 32
t=26
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 80.91780130205522
Explore rate: 0.8927900303521317
Learning rate: 0.8927900303521317
Streaks: 0


Episode = 32
t=27
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 80.9959606422588
Explore rate: 0.8927900303521317
Learning rate: 0.8927900303521317
Streaks: 0


Episode = 32
t=28
Action: 0
State:

Episode = 35
t=1
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.46518379495302
Explore rate: 0.853871964321762
Learning rate: 0.853871964321762
Streaks: 0


Episode = 35
t=2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.60636995488004
Explore rate: 0.853871964321762
Learning rate: 0.853871964321762
Streaks: 0


Episode = 35
t=3
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.74635056576994
Explore rate: 0.853871964321762
Learning rate: 0.853871964321762
Streaks: 0


Episode = 35
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.74635056576994
Explore rate: 0.853871964321762
Learning rate: 0.853871964321762
Streaks: 0


Episode = 35
t=5
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.88513592146798
Explore rate: 0.853871964321762
Learning rate: 0.853871964321762
Streaks: 0


Episode = 35
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.98869205146681
Explore rate: 0.853871964321762
Learning rate: 0.853871964321762
Streaks: 0


Episode = 35
t=7
Actio


Episode = 37
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.93129084556807
Explore rate: 0.8297382846050426
Learning rate: 0.8297382846050426
Streaks: 0


Episode = 37
t=9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 85.05949427154859
Explore rate: 0.8297382846050426
Learning rate: 0.8297382846050426
Streaks: 0


Episode = 37
t=10
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 85.05949427154859
Explore rate: 0.8297382846050426
Learning rate: 0.8297382846050426
Streaks: 0


Episode = 37
t=11
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 85.08306627992991
Explore rate: 0.8297382846050426
Learning rate: 0.8297382846050426
Streaks: 0


Episode = 37
t=12
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 85.20282437931219
Explore rate: 0.8297382846050426
Learning rate: 0.8297382846050426
Streaks: 0


Episode = 37
t=13
Action: 1
State: (0, 0, 4, 2)
Reward: 1.0
Best Q: 25.058572475290404
Explore rate: 0.8297382846050426
Learning rate: 0.8297382846050426
Streaks: 0


Epis

Episode = 41
t=11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 76.98883987758005
Explore rate: 0.7851561519523022
Learning rate: 0.7851561519523022
Streaks: 0


Episode = 41
t=12
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.84182011943518
Explore rate: 0.7851561519523022
Learning rate: 0.7851561519523022
Streaks: 0


Episode = 41
t=13
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.84182011943518
Explore rate: 0.7851561519523022
Learning rate: 0.7851561519523022
Streaks: 0


Episode = 41
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.95298393977191
Explore rate: 0.7851561519523022
Learning rate: 0.7851561519523022
Streaks: 0


Episode = 41
t=15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.03983019252101
Explore rate: 0.7851561519523022
Learning rate: 0.7851561519523022
Streaks: 0


Episode = 41
t=16
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.03983019252101
Explore rate: 0.7851561519523022
Learning rate: 0.7851561519523022
Streaks: 0


Epis


Episode = 42
t=36
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.8995013052793
Explore rate: 0.7746907182741372
Learning rate: 0.7746907182741372
Streaks: 0


Episode = 42
t=37
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.98341211124037
Explore rate: 0.7746907182741372
Learning rate: 0.7746907182741372
Streaks: 0


Episode = 42
t=38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.08425040945059
Explore rate: 0.7746907182741372
Learning rate: 0.7746907182741372
Streaks: 0


Episode = 42
t=39
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.14268183476378
Explore rate: 0.7746907182741372
Learning rate: 0.7746907182741372
Streaks: 0


Episode = 42
t=40
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.24228628520883
Explore rate: 0.7746907182741372
Learning rate: 0.7746907182741372
Streaks: 0


Episode = 42
t=41
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.80417056135782
Explore rate: 0.7746907182741372
Learning rate: 0.7746907182741372
Streaks: 0


Epis

Episode = 44
t=12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 38.20728187763633
Explore rate: 0.7544873321858502
Learning rate: 0.7544873321858502
Streaks: 0


Episode = 44
t=13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 38.545613801569324
Explore rate: 0.7544873321858502
Learning rate: 0.7544873321858502
Streaks: 0


Episode = 44
t=14
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 39.009279360509055
Explore rate: 0.7544873321858502
Learning rate: 0.7544873321858502
Streaks: 0


Episode = 44
t=15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 39.46944662154287
Explore rate: 0.7544873321858502
Learning rate: 0.7544873321858502
Streaks: 0


Episode = 44
t=16
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 25.488131590943485
Explore rate: 0.7544873321858502
Learning rate: 0.7544873321858502
Streaks: 0


Episode = 44
t=17
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 25.488131590943485
Explore rate: 0.7544873321858502
Learning rate: 0.7544873321858502
Streaks: 0

E

Action: 0
State: (0, 0, 4, 2)
Reward: 1.0
Best Q: 43.516394218271536
Explore rate: 0.7351821769904635
Learning rate: 0.7351821769904635
Streaks: 0

Episode 46 finished after 28.000000 time steps

Episode = 47
t=0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.05949416459565
Explore rate: 0.7258421507363202
Learning rate: 0.7258421507363202
Streaks: 0


Episode = 47
t=1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.14616338896015
Explore rate: 0.7258421507363202
Learning rate: 0.7258421507363202
Streaks: 0


Episode = 47
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.14616338896015
Explore rate: 0.7258421507363202
Learning rate: 0.7258421507363202
Streaks: 0


Episode = 47
t=3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.67842305300341
Explore rate: 0.7258421507363202
Learning rate: 0.7258421507363202
Streaks: 0


Episode = 47
t=4
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.67842305300341
Explore rate: 0.7258421507363202
Learning rate: 0.725842150


Episode = 49
t=5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.73596536249414
Explore rate: 0.707743928643524
Learning rate: 0.707743928643524
Streaks: 0


Episode = 49
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.62227502601186
Explore rate: 0.707743928643524
Learning rate: 0.707743928643524
Streaks: 0


Episode = 49
t=7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.56700258939863
Explore rate: 0.707743928643524
Learning rate: 0.707743928643524
Streaks: 0


Episode = 49
t=8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.56700258939863
Explore rate: 0.707743928643524
Learning rate: 0.707743928643524
Streaks: 0


Episode = 49
t=9
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.56700258939863
Explore rate: 0.707743928643524
Learning rate: 0.707743928643524
Streaks: 0


Episode = 49
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.62227502601186
Explore rate: 0.707743928643524
Learning rate: 0.707743928643524
Streaks: 0


Episode = 49
t=11
Ac



Episode = 52
t=11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.17589883558753
Explore rate: 0.6819366650372385
Learning rate: 0.6819366650372385
Streaks: 0


Episode = 52
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.1886664265582
Explore rate: 0.6819366650372385
Learning rate: 0.6819366650372385
Streaks: 0


Episode = 52
t=13
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.26239287417498
Explore rate: 0.6819366650372385
Learning rate: 0.6819366650372385
Streaks: 0


Episode = 52
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.3081059717461
Explore rate: 0.6819366650372385
Learning rate: 0.6819366650372385
Streaks: 0


Episode = 52
t=15
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.36647825704674
Explore rate: 0.6819366650372385
Learning rate: 0.6819366650372385
Streaks: 0


Episode = 52
t=16
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.22908099618682
Explore rate: 0.6819366650372385
Learning rate: 0.6819366650372385
Streaks: 0


Epis

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.67867666133328
Explore rate: 0.6736641390712486
Learning rate: 0.6736641390712486
Streaks: 0


Episode = 53
t=39
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.70830540413473
Explore rate: 0.6736641390712486
Learning rate: 0.6736641390712486
Streaks: 0


Episode = 53
t=40
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.74649275777999
Explore rate: 0.6736641390712486
Learning rate: 0.6736641390712486
Streaks: 0


Episode = 53
t=41
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.8098416975302
Explore rate: 0.6736641390712486
Learning rate: 0.6736641390712486
Streaks: 0


Episode = 53
t=42
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.8098416975302
Explore rate: 0.6736641390712486
Learning rate: 0.6736641390712486
Streaks: 0


Episode = 53
t=43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.8098416975302
Explore rate: 0.6736641390712486
Learning rate: 0.6736641390712486
Streaks: 0


Episode = 53
t=44
Action: 0
State: 

Streaks: 0


Episode = 54
t=35
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.75951406578376
Explore rate: 0.6655462488490691
Learning rate: 0.6655462488490691
Streaks: 0


Episode = 54
t=36
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.82766923578285
Explore rate: 0.6655462488490691
Learning rate: 0.6655462488490691
Streaks: 0


Episode = 54
t=37
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 27.08060171357642
Explore rate: 0.6655462488490691
Learning rate: 0.6655462488490691
Streaks: 0


Episode = 54
t=38
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 27.08060171357642
Explore rate: 0.6655462488490691
Learning rate: 0.6655462488490691
Streaks: 0


Episode = 54
t=39
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 27.565914033555025
Explore rate: 0.6655462488490691
Learning rate: 0.6655462488490691
Streaks: 0

Episode 54 finished after 39.000000 time steps

Episode = 55
t=0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.90225521235914
Explore rate: 0.65757731


Episode = 57
t=13
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 28.04799637559281
Explore rate: 0.6420651529995463
Learning rate: 0.6420651529995463
Streaks: 0


Episode = 57
t=14
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 28.04799637559281
Explore rate: 0.6420651529995463
Learning rate: 0.6420651529995463
Streaks: 0


Episode = 57
t=15
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 28.04799637559281
Explore rate: 0.6420651529995463
Learning rate: 0.6420651529995463
Streaks: 0

Episode 57 finished after 15.000000 time steps

Episode = 58
t=0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.92896314778207
Explore rate: 0.6345120151091004
Learning rate: 0.6345120151091004
Streaks: 0


Episode = 58
t=1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.85679271318018
Explore rate: 0.6345120151091004
Learning rate: 0.6345120151091004
Streaks: 0


Episode = 58
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.9624149022065
Explore rate: 0.6345120151091004
Learnin


Episode = 59
t=29
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.5271056684089
Explore rate: 0.6270879970298935
Learning rate: 0.6270879970298935
Streaks: 0


Episode = 59
t=30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.58650905173363
Explore rate: 0.6270879970298935
Learning rate: 0.6270879970298935
Streaks: 0


Episode = 59
t=31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.61813817510226
Explore rate: 0.6270879970298935
Learning rate: 0.6270879970298935
Streaks: 0


Episode = 59
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.67697070450411
Explore rate: 0.6270879970298935
Learning rate: 0.6270879970298935
Streaks: 0


Episode = 59
t=33
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 90.44397334771529
Explore rate: 0.6270879970298935
Learning rate: 0.6270879970298935
Streaks: 0


Episode = 59
t=34
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.61258733216017
Explore rate: 0.6270879970298935
Learning rate: 0.6270879970298935
Streaks: 0


Epis


Episode = 62
t=1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.10646968334598
Explore rate: 0.6055483191737837
Learning rate: 0.6055483191737837
Streaks: 0


Episode = 62
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.16637978988543
Explore rate: 0.6055483191737837
Learning rate: 0.6055483191737837
Streaks: 0


Episode = 62
t=3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.20224443378754
Explore rate: 0.6055483191737837
Learning rate: 0.6055483191737837
Streaks: 0


Episode = 62
t=4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.20224443378754
Explore rate: 0.6055483191737837
Learning rate: 0.6055483191737837
Streaks: 0


Episode = 62
t=5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.34406375875346
Explore rate: 0.6055483191737837
Learning rate: 0.6055483191737837
Streaks: 0


Episode = 62
t=6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.34406375875346
Explore rate: 0.6055483191737837
Learning rate: 0.6055483191737837
Streaks: 0


Episode =


Episode = 65
t=8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 83.93786340298482
Explore rate: 0.585026652029182
Learning rate: 0.585026652029182
Streaks: 0


Episode = 65
t=9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.03183118296269
Explore rate: 0.585026652029182
Learning rate: 0.585026652029182
Streaks: 0


Episode = 65
t=10
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.12524922638337
Explore rate: 0.585026652029182
Learning rate: 0.585026652029182
Streaks: 0


Episode = 65
t=11
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.12524922638337
Explore rate: 0.585026652029182
Learning rate: 0.585026652029182
Streaks: 0


Episode = 65
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.18422813678183
Explore rate: 0.585026652029182
Learning rate: 0.585026652029182
Streaks: 0


Episode = 65
t=13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.18422813678183
Explore rate: 0.585026652029182
Learning rate: 0.585026652029182
Streaks: 0


Episode = 65
t=14


Episode = 67
t=16
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 88.55311862658952
Explore rate: 0.5718652059712112
Learning rate: 0.5718652059712112
Streaks: 0


Episode = 67
t=17
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 88.55311862658952
Explore rate: 0.5718652059712112
Learning rate: 0.5718652059712112
Streaks: 0


Episode = 67
t=18
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 66.95122048559028
Explore rate: 0.5718652059712112
Learning rate: 0.5718652059712112
Streaks: 0


Episode = 67
t=19
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 66.95122048559028
Explore rate: 0.5718652059712112
Learning rate: 0.5718652059712112
Streaks: 0


Episode = 67
t=20
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 67.14021495663133
Explore rate: 0.5718652059712112
Learning rate: 0.5718652059712112
Streaks: 0

Episode 67 finished after 20.000000 time steps

Episode = 68
t=0
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.59404731406637
Explore rate: 0.5654310959658013
Lear


Episode = 69
t=33
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 75.24610026367073
Explore rate: 0.5590909179347823
Learning rate: 0.5590909179347823
Streaks: 0


Episode = 69
t=34
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 75.24610026367073
Explore rate: 0.5590909179347823
Learning rate: 0.5590909179347823
Streaks: 0


Episode = 69
t=35
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 75.24610026367073
Explore rate: 0.5590909179347823
Learning rate: 0.5590909179347823
Streaks: 0


Episode = 69
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.52289254114564
Explore rate: 0.5590909179347823
Learning rate: 0.5590909179347823
Streaks: 0


Episode = 69
t=37
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 83.9012221366367
Explore rate: 0.5590909179347823
Learning rate: 0.5590909179347823
Streaks: 0


Episode = 69
t=38
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 83.9012221366367
Explore rate: 0.5590909179347823
Learning rate: 0.5590909179347823
Streaks: 0


Episo


Episode = 71
t=4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.96762752895876
Explore rate: 0.5466816599529624
Learning rate: 0.5466816599529624
Streaks: 0


Episode = 71
t=5
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.96762752895876
Explore rate: 0.5466816599529624
Learning rate: 0.5466816599529624
Streaks: 0


Episode = 71
t=6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.0224726693161
Explore rate: 0.5466816599529624
Learning rate: 0.5466816599529624
Streaks: 0


Episode = 71
t=7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.0224726693161
Explore rate: 0.5466816599529624
Learning rate: 0.5466816599529624
Streaks: 0


Episode = 71
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.07701798134974
Explore rate: 0.5466816599529624
Learning rate: 0.5466816599529624
Streaks: 0


Episode = 71
t=9
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.03545293387663
Explore rate: 0.5466816599529624
Learning rate: 0.5466816599529624
Streaks: 0


Episode = 7

Episode = 73
t=5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.41229934758302
Explore rate: 0.5346171485515817
Learning rate: 0.5346171485515817
Streaks: 0


Episode = 73
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.09171031055503
Explore rate: 0.5346171485515817
Learning rate: 0.5346171485515817
Streaks: 0


Episode = 73
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.83918785808078
Explore rate: 0.5346171485515817
Learning rate: 0.5346171485515817
Streaks: 0


Episode = 73
t=8
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.83918785808078
Explore rate: 0.5346171485515817
Learning rate: 0.5346171485515817
Streaks: 0


Episode = 73
t=9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.83918785808078
Explore rate: 0.5346171485515817
Learning rate: 0.5346171485515817
Streaks: 0


Episode = 73
t=10
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.83918785808078
Explore rate: 0.5346171485515817
Learning rate: 0.5346171485515817
Streaks: 0


Episode =


Episode = 77
t=3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.9409046606735
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.00502933775498
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=5
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.19613269578694
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.19613269578694
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.14133952099883
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.00188274837352
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 


Episode = 77
t=64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.24330473996444
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=65
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.29831978080013
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.29831978080013
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=67
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.35305344760364
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.35305344760364
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=69
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.21791571306905
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Epi

Best Q: 89.47115324524843
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=123
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.47115324524843
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=124
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.47115324524843
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=125
Action: 1
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.47115324524843
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=126
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.52854919037277
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=127
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.52854919037277
Explore rate: 0.5114492834995557
Learning rate: 0.5114492834995557
Streaks: 0


Episode = 77
t=128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.

State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.05799718068812
Explore rate: 0.5003129173815961
Learning rate: 0.5003129173815961
Streaks: 0


Episode = 79
t=11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.05799718068812
Explore rate: 0.5003129173815961
Learning rate: 0.5003129173815961
Streaks: 0


Episode = 79
t=12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.11274143421339
Explore rate: 0.5003129173815961
Learning rate: 0.5003129173815961
Streaks: 0


Episode = 79
t=13
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.16721179516675
Explore rate: 0.5003129173815961
Learning rate: 0.5003129173815961
Streaks: 0


Episode = 79
t=14
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 68.01420510213207
Explore rate: 0.5003129173815961
Learning rate: 0.5003129173815961
Streaks: 0


Episode = 79
t=15
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 68.01420510213207
Explore rate: 0.5003129173815961
Learning rate: 0.5003129173815961
Streaks: 0

Episode 79 finished after 15.0000

State: (0, 0, 4, 2)
Reward: 1.0
Best Q: 50.358690769461376
Explore rate: 0.49485002168009395
Learning rate: 0.49485002168009395
Streaks: 0

Episode 80 finished after 52.000000 time steps

Episode = 81
t=0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.07797932412566
Explore rate: 0.48945498979338786
Learning rate: 0.48945498979338786
Streaks: 0


Episode = 81
t=1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.08746877010759
Explore rate: 0.48945498979338786
Learning rate: 0.48945498979338786
Streaks: 0


Episode = 81
t=2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.15593537820794
Explore rate: 0.48945498979338786
Learning rate: 0.48945498979338786
Streaks: 0


Episode = 81
t=3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.16916235856638
Explore rate: 0.48945498979338786
Learning rate: 0.48945498979338786
Streaks: 0


Episode = 81
t=4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.21052691511876
Explore rate: 0.48945498979338786
Learning rate: 0.48945498


Episode = 83
t=4
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.81346671442668
Explore rate: 0.47886191629596375
Learning rate: 0.47886191629596375
Streaks: 0


Episode = 83
t=5
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.81346671442668
Explore rate: 0.47886191629596375
Learning rate: 0.47886191629596375
Streaks: 0


Episode = 83
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.74530137096951
Explore rate: 0.47886191629596375
Learning rate: 0.47886191629596375
Streaks: 0


Episode = 83
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.81806648790972
Explore rate: 0.47886191629596375
Learning rate: 0.47886191629596375
Streaks: 0


Episode = 83
t=8
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.81806648790972
Explore rate: 0.47886191629596375
Learning rate: 0.47886191629596375
Streaks: 0


Episode = 83
t=9
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.81806648790972
Explore rate: 0.47886191629596375
Learning rate: 0.47886191629596375
Streaks: 0


Episode = 85
t=1
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.19346108594091
Explore rate: 0.46852108295774486
Learning rate: 0.46852108295774486
Streaks: 0


Episode = 85
t=2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.19346108594091
Explore rate: 0.46852108295774486
Learning rate: 0.46852108295774486
Streaks: 0


Episode = 85
t=3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.12860651546497
Explore rate: 0.46852108295774486
Learning rate: 0.46852108295774486
Streaks: 0


Episode = 85
t=4
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.12860651546497
Explore rate: 0.46852108295774486
Learning rate: 0.46852108295774486
Streaks: 0


Episode = 85
t=5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.20296734009919
Explore rate: 0.46852108295774486
Learning rate: 0.46852108295774486
Streaks: 0


Episode = 85
t=6
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.27697976859255
Explore rate: 0.46852108295774486
Learning rate: 0.46852108295774486
Streaks: 0


Episode = 87
t=15
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.68421013341067
Explore rate: 0.45842075605341903
Learning rate: 0.45842075605341903
Streaks: 0


Episode = 87
t=16
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.74758679481792
Explore rate: 0.45842075605341903
Learning rate: 0.45842075605341903
Streaks: 0


Episode = 87
t=17
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.78587363241071
Explore rate: 0.45842075605341903
Learning rate: 0.45842075605341903
Streaks: 0


Episode = 87
t=18
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.81196195117265
Explore rate: 0.45842075605341903
Learning rate: 0.45842075605341903
Streaks: 0


Episode = 87
t=19
Action: 1
State: (0, 0, 4, 2)
Reward: 1.0
Best Q: 50.90699168738213
Explore rate: 0.45842075605341903
Learning rate: 0.45842075605341903
Streaks: 0


Episode = 87
t=20
Action: 1
State: (0, 0, 4, 1)
Reward: 1.0
Best Q: 49.50812212642145
Explore rate: 0.45842075605341903
Learning rate: 0.45842075605341903
Stre


Episode = 89
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.17673731271832
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.22079955770124
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=35
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.1952812925569
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=36
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.27752154144676
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=37
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.27578033368609
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.53347169142707
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Epis


Episode = 89
t=95
Action: 0
State: (0, 0, 1, 1)
Reward: 1.0
Best Q: 73.57671632011704
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=96
Action: 0
State: (0, 0, 1, 1)
Reward: 1.0
Best Q: 73.57671632011704
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=97
Action: 0
State: (0, 0, 1, 1)
Reward: 1.0
Best Q: 73.69523795959878
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=98
Action: 0
State: (0, 0, 1, 1)
Reward: 1.0
Best Q: 73.81322797026424
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=99
Action: 0
State: (0, 0, 1, 2)
Reward: 1.0
Best Q: 0.0
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=100
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.50242251379296
Explore rate: 0.4485500020271248
Learning rate: 0.4485500020271248
Streaks: 0


Episode = 89
t=1

Episode = 92
t=4
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.07326790800344
Explore rate: 0.43415218132648237
Learning rate: 0.43415218132648237
Streaks: 0


Episode = 92
t=5
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.53221740457592
Explore rate: 0.43415218132648237
Learning rate: 0.43415218132648237
Streaks: 0


Episode = 92
t=6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.53221740457592
Explore rate: 0.43415218132648237
Learning rate: 0.43415218132648237
Streaks: 0


Episode = 92
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.58200503286373
Explore rate: 0.43415218132648237
Learning rate: 0.43415218132648237
Streaks: 0


Episode = 92
t=8
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.63157650707731
Explore rate: 0.43415218132648237
Learning rate: 0.43415218132648237
Streaks: 0


Episode = 92
t=9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.68093276565426
Explore rate: 0.43415218132648237
Learning rate: 0.43415218132648237
Streaks: 0



Episode = 95
t=3
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 80.01400837044552
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=4
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 80.01400837044552
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=5
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 80.01400837044552
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=6
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 80.0979927856517
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=7
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 80.1816242845689
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.79046589468679
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 9

Episode = 95
t=63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.4709042532474
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=64
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.29926373257095
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=65
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.29926373257095
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=66
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.29926373257095
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.51094707666934
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episode = 95
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.51094707666934
Explore rate: 0.4202164033831899
Learning rate: 0.4202164033831899
Streaks: 0


Episo

Episode = 97
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.2839959430381
Explore rate: 0.41116827440579273
Learning rate: 0.41116827440579273
Streaks: 0


Episode = 97
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.2839959430381
Explore rate: 0.41116827440579273
Learning rate: 0.41116827440579273
Streaks: 0


Episode = 97
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.33628011749248
Explore rate: 0.41116827440579273
Learning rate: 0.41116827440579273
Streaks: 0


Episode = 97
t=13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 85.55423243642686
Explore rate: 0.41116827440579273
Learning rate: 0.41116827440579273
Streaks: 0


Episode = 97
t=14
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 85.55423243642686
Explore rate: 0.41116827440579273
Learning rate: 0.41116827440579273
Streaks: 0


Episode = 97
t=15
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 85.61362884964268
Explore rate: 0.41116827440579273
Learning rate: 0.41116827440579273
Streaks

Episode = 101
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.03874401628187
Explore rate: 0.3936186348893951
Learning rate: 0.3936186348893951
Streaks: 0


Episode = 101
t=1
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.0858257488006
Explore rate: 0.3936186348893951
Learning rate: 0.3936186348893951
Streaks: 0


Episode = 101
t=2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.0858257488006
Explore rate: 0.3936186348893951
Learning rate: 0.3936186348893951
Streaks: 0


Episode = 101
t=3
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.13272215884652
Explore rate: 0.3936186348893951
Learning rate: 0.3936186348893951
Streaks: 0


Episode = 101
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.13272215884652
Explore rate: 0.3936186348893951
Learning rate: 0.3936186348893951
Streaks: 0


Episode = 101
t=5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.45985520982627
Explore rate: 0.3936186348893951
Learning rate: 0.3936186348893951
Streaks: 0


Episod


Episode = 102
t=27
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 72.0535487469955
Explore rate: 0.3893398369101201
Learning rate: 0.3893398369101201
Streaks: 0


Episode = 102
t=28
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 72.0535487469955
Explore rate: 0.3893398369101201
Learning rate: 0.3893398369101201
Streaks: 0


Episode = 102
t=29
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 72.16235541472612
Explore rate: 0.3893398369101201
Learning rate: 0.3893398369101201
Streaks: 0

Episode 102 finished after 29.000000 time steps

Episode = 103
t=0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.96101906297618
Explore rate: 0.38510278396686537
Learning rate: 0.38510278396686537
Streaks: 0


Episode = 103
t=1
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 75.73046440980286
Explore rate: 0.38510278396686537
Learning rate: 0.38510278396686537
Streaks: 0


Episode = 103
t=2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.01324427180005
Explore rate: 0.38510278396686

Learning rate: 0.38090666937325723
Streaks: 0


Episode = 104
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.77454514140989
Explore rate: 0.38090666937325723
Learning rate: 0.38090666937325723
Streaks: 0


Episode = 104
t=9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 86.79630298780187
Explore rate: 0.38090666937325723
Learning rate: 0.38090666937325723
Streaks: 0


Episode = 104
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.8331266127351
Explore rate: 0.38090666937325723
Learning rate: 0.38090666937325723
Streaks: 0


Episode = 104
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.8331266127351
Explore rate: 0.38090666937325723
Learning rate: 0.38090666937325723
Streaks: 0


Episode = 104
t=12
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.8331266127351
Explore rate: 0.38090666937325723
Learning rate: 0.38090666937325723
Streaks: 0


Episode = 104
t=13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 86.86048285100945
Explore rate: 0.380906669


Episode = 106
t=6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.4434937274515
Explore rate: 0.3726341434072673
Learning rate: 0.3726341434072673
Streaks: 0


Episode = 106
t=7
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.4434937274515
Explore rate: 0.3726341434072673
Learning rate: 0.3726341434072673
Streaks: 0


Episode = 106
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.4434937274515
Explore rate: 0.3726341434072673
Learning rate: 0.3726341434072673
Streaks: 0


Episode = 106
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.36758120061197
Explore rate: 0.3726341434072673
Learning rate: 0.3726341434072673
Streaks: 0


Episode = 106
t=10
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.49769729596423
Explore rate: 0.3726341434072673
Learning rate: 0.3726341434072673
Streaks: 0


Episode = 106
t=11
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.4663810903685
Explore rate: 0.3726341434072673
Learning rate: 0.3726341434072673
Streaks: 0


Episo


Episode = 107
t=29
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 78.33874305891888
Explore rate: 0.368556230986828
Learning rate: 0.368556230986828
Streaks: 0


Episode = 107
t=30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.05353118675819
Explore rate: 0.368556230986828
Learning rate: 0.368556230986828
Streaks: 0


Episode = 107
t=31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.05353118675819
Explore rate: 0.368556230986828
Learning rate: 0.368556230986828
Streaks: 0


Episode = 107
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.10124620426215
Explore rate: 0.368556230986828
Learning rate: 0.368556230986828
Streaks: 0


Episode = 107
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.14878536509599
Explore rate: 0.368556230986828
Learning rate: 0.368556230986828
Streaks: 0


Episode = 107
t=34
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.19614931739042
Explore rate: 0.368556230986828
Learning rate: 0.368556230986828
Streaks: 0


Episode =


Episode = 108
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.5846514738739
Explore rate: 0.36451625318508785
Learning rate: 0.36451625318508785
Streaks: 0


Episode = 108
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.5846514738739
Explore rate: 0.36451625318508785
Learning rate: 0.36451625318508785
Streaks: 0


Episode = 108
t=44
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.64084292473676
Explore rate: 0.36451625318508785
Learning rate: 0.36451625318508785
Streaks: 0


Episode = 108
t=45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.64084292473676
Explore rate: 0.36451625318508785
Learning rate: 0.36451625318508785
Streaks: 0


Episode = 108
t=46
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.69682954862833
Explore rate: 0.36451625318508785
Learning rate: 0.36451625318508785
Streaks: 0


Episode = 108
t=47
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.69682954862833
Explore rate: 0.36451625318508785
Learning rate: 0.36451625318508785


Episode = 109
t=11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 86.53542610936631
Explore rate: 0.3605135107314139
Learning rate: 0.3605135107314139
Streaks: 0


Episode = 109
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.1994446529267
Explore rate: 0.3605135107314139
Learning rate: 0.3605135107314139
Streaks: 0


Episode = 109
t=13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.1994446529267
Explore rate: 0.3605135107314139
Learning rate: 0.3605135107314139
Streaks: 0


Episode = 109
t=14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.24919751950887
Explore rate: 0.3605135107314139
Learning rate: 0.3605135107314139
Streaks: 0


Episode = 109
t=15
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.22402968516421
Explore rate: 0.3605135107314139
Learning rate: 0.3605135107314139
Streaks: 0


Episode = 109
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.6008415753761
Explore rate: 0.3605135107314139
Learning rate: 0.3605135107314139
Streaks: 0


E

Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 82.42083311996663
Explore rate: 0.35261702988538013
Learning rate: 0.35261702988538013
Streaks: 0


Episode = 111
t=18
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 82.4828202560976
Explore rate: 0.35261702988538013
Learning rate: 0.35261702988538013
Streaks: 0


Episode = 111
t=19
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 82.54458881503022
Explore rate: 0.35261702988538013
Learning rate: 0.35261702988538013
Streaks: 0


Episode = 111
t=20
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 82.60613956750494
Explore rate: 0.35261702988538013
Learning rate: 0.35261702988538013
Streaks: 0


Episode = 111
t=21
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 82.66747328154442
Explore rate: 0.35261702988538013
Learning rate: 0.35261702988538013
Streaks: 0


Episode = 111
t=22
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 71.61626952014475
Explore rate: 0.35261702988538013
Learning rate: 0.35261702988538013
Streaks: 0


Episod

Episode = 113
t=38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.18416046070924
Explore rate: 0.3448615651886179
Learning rate: 0.3448615651886179
Streaks: 0


Episode = 113
t=39
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.18416046070924
Explore rate: 0.3448615651886179
Learning rate: 0.3448615651886179
Streaks: 0


Episode = 113
t=40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.23180598118839
Explore rate: 0.3448615651886179
Learning rate: 0.3448615651886179
Streaks: 0


Episode = 113
t=41
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.27928719057986
Explore rate: 0.3448615651886179
Learning rate: 0.3448615651886179
Streaks: 0


Episode = 113
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.27928719057986
Explore rate: 0.3448615651886179
Learning rate: 0.3448615651886179
Streaks: 0


Episode = 113
t=43
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 79.236568262559
Explore rate: 0.3448615651886179
Learning rate: 0.3448615651886179
Streaks: 0





Episode = 115
t=27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.12682761277084
Explore rate: 0.3372421683184259
Learning rate: 0.3372421683184259
Streaks: 0


Episode = 115
t=28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.17698622182726
Explore rate: 0.3372421683184259
Learning rate: 0.3372421683184259
Streaks: 0


Episode = 115
t=29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 85.06789066119254
Explore rate: 0.3372421683184259
Learning rate: 0.3372421683184259
Streaks: 0


Episode = 115
t=30
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.22368286924213
Explore rate: 0.3372421683184259
Learning rate: 0.3372421683184259
Streaks: 0


Episode = 115
t=31
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 85.17026233553167
Explore rate: 0.3372421683184259
Learning rate: 0.3372421683184259
Streaks: 0


Episode = 115
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 84.93431121074326
Explore rate: 0.3372421683184259
Learning rate: 0.3372421683184259
Streaks: 0


Episode = 116
t=41
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.61415237002548
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.61415237002548
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.66879622560512
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=44
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.72325785375168
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.72325785375168
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=46
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 83.77753786216086
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0

State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 82.93311389959001
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=101
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 84.96080065843314
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 84.96080065843314
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.01095368410577
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=104
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 84.79068091013586
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=105
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.02153343643434
Explore rate: 0.3334820194451191
Learning rate: 0.3334820194451191
Streaks: 0


Episode = 116
t=106
Ac


Episode = 117
t=19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.2136800485803
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.26243855179783
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 85.40223759186283
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=22
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.5434774281646
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=23
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.5434774281646
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.2309342222228
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117


Episode = 117
t=69
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.64216081771592
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.64216081771592
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.68950638782844
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=72
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.73669583396014
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=73
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.73669583396014
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=74
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 85.78372967093631
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode =

Best Q: 86.33019841480272
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.33019841480272
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=131
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 86.09319455376871
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=132
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.32811617290218
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.32811617290218
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=134
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 86.21574453576326
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=135
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best


Episode = 117
t=187
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.64699824886587
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.64699824886587
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=189
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.69103032587932
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=190
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.62457454137027
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=191
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.62457454137027
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Episode = 117
t=192
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.62457454137027
Explore rate: 0.329754146925876
Learning rate: 0.329754146925876
Streaks: 0


Epi


Episode = 120
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.42665767210727
Explore rate: 0.31875876262441283
Learning rate: 0.31875876262441283
Streaks: 0


Episode = 120
t=3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.12469421170894
Explore rate: 0.31875876262441283
Learning rate: 0.31875876262441283
Streaks: 0


Episode = 120
t=4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.40410765807576
Explore rate: 0.31875876262441283
Learning rate: 0.31875876262441283
Streaks: 0


Episode = 120
t=5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.68710255058289
Explore rate: 0.31875876262441283
Learning rate: 0.31875876262441283
Streaks: 0


Episode = 120
t=6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.68710255058289
Explore rate: 0.31875876262441283
Learning rate: 0.31875876262441283
Streaks: 0


Episode = 120
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.74547651589333
Explore rate: 0.31875876262441283
Learning rate: 0.31875876262441283
Stre


Episode = 121
t=38
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 85.76642786535398
Explore rate: 0.31515463835558755
Learning rate: 0.31515463835558755
Streaks: 0


Episode = 121
t=39
Action: 0
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 71.8149866747398
Explore rate: 0.31515463835558755
Learning rate: 0.31515463835558755
Streaks: 0


Episode = 121
t=40
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 71.8149866747398
Explore rate: 0.31515463835558755
Learning rate: 0.31515463835558755
Streaks: 0


Episode = 121
t=41
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 71.9038130515555
Explore rate: 0.31515463835558755
Learning rate: 0.31515463835558755
Streaks: 0

Episode 121 finished after 41.000000 time steps

Episode = 122
t=0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.79985249089077
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=1
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.79985249089077
Explore rate: 0.31158017

Best Q: 86.51009629735476
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=46
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 86.5884054841262
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=47
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.60521932746106
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=48
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 86.63537982588714
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.40213072385994
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.40213072385994
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=51
Action: 0
State: (0, 0, 2, 1)

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.53861361709896
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=106
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.70437337703127
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.04436299572652
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.04436299572652
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.08784599434509
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=110
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.25534136704705
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 

Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=166
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 86.09997472335021
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=167
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.42668735029343
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=168
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.42668735029343
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.46897910200752
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=170
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.52884689231242
Explore rate: 0.31158017799728943
Learning rate: 0.31158017799728943
Streaks: 0


Episode = 122
t=171
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.52884689231242
Explore rate: 

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.846545906352
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.88706313515192
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=27
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.69968828182549
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=28
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.89826488915679
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.80121472294196
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=30
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.90902691584454
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=3


Episode = 123
t=74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.07244160480336
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.07244160480336
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=76
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.9842680137722
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.11226299602069
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.11226299602069
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=79
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.15196172345651
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966


Episode = 123
t=132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.29818239558664
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.29818239558664
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.33730842639106
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=135
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 87.23534723012554
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=136
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.3722108558463
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=137
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.34373233225963
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723


Episode = 123
t=193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.77366979984711
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.77366979984711
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=195
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.63831242508026
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=196
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.81133116351448
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=197
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.81133116351448
Explore rate: 0.30803489723263966
Learning rate: 0.30803489723263966
Streaks: 1


Episode = 123
t=198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.84887651703897
Explore rate: 0.30803489723263966
Learning rate: 0.3080348972


Episode = 124
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.06486534321384
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=44
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.05270678935211
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=45
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.12282159115547
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=46
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.11022621582137
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=47
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.15519260837145
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=48
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.15998891518696
Explore rate: 0.30451832350980257
Learning rate: 0.3045183235098025


Episode = 124
t=103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.4836832951482
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=104
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.4836832951482
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=105
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.51875258970789
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.51875258970789
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.55371509183972
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=108
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.5885711267462
Explore rate: 0.30451832350980257
Learning rate: 0.3045183235098



Episode = 124
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.78720195031747
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.78720195031747
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.8213469749569
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=166
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.67319360941873
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=167
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.83439868567469
Explore rate: 0.30451832350980257
Learning rate: 0.30451832350980257
Streaks: 2


Episode = 124
t=168
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.36297357883659
Explore rate: 0.30451832350980257
Learning rate: 0.3045183235


Episode = 125
t=12
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.06768692308624
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=13
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.51987864972503
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.80551791810915
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.80551791810915
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.83921666703486
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.78771527676336
Explore rate: 0.30102999566398114
Learning rate: 0.3010299956639811



Episode = 125
t=62
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.0156572004711
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=63
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.12942261885674
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.12942261885674
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.16214631747584
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=66
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.18397557665432
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.13568657807824
Explore rate: 0.30102999566398114
Learning rate: 0.3010299956639811


Episode = 125
t=123
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.37618103486336
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=124
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.48028941276432
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.43918826373609
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=126
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.43918826373609
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=127
Action: 0
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.43918826373609
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=128
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.49970794516233
Explore rate: 0.30102999566398114
Learning rate: 0.3010299956

Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.62920751810182
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=173
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.55971545377311
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=174
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.55971545377311
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=175
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.55971545377311
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.73514858100526
Explore rate: 0.30102999566398114
Learning rate: 0.30102999566398114
Streaks: 3


Episode = 125
t=177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q


Episode = 126
t=31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.02043311241628
Explore rate: 0.29756946355447467
Learning rate: 0.29756946355447467
Streaks: 4


Episode = 126
t=32
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.97386976196488
Explore rate: 0.29756946355447467
Learning rate: 0.29756946355447467
Streaks: 4


Episode = 126
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.81334515505966
Explore rate: 0.29756946355447467
Learning rate: 0.29756946355447467
Streaks: 4


Episode = 126
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.81334515505966
Explore rate: 0.29756946355447467
Learning rate: 0.29756946355447467
Streaks: 4


Episode = 126
t=35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.8436575292359
Explore rate: 0.29756946355447467
Learning rate: 0.29756946355447467
Streaks: 4


Episode = 126
t=36
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.73698766570358
Explore rate: 0.29756946355447467
Learning rate: 0.29756946355447467

State: (0, 0, 4, 2)
Reward: 1.0
Best Q: 53.08161744011811
Explore rate: 0.2941362877160807
Learning rate: 0.2941362877160807
Streaks: 0

Episode 127 finished after 10.000000 time steps

Episode = 128
t=0
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.29803764595916
Explore rate: 0.2907300390241693
Learning rate: 0.2907300390241693
Streaks: 0


Episode = 128
t=1
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 83.87639093575135
Explore rate: 0.2907300390241693
Learning rate: 0.2907300390241693
Streaks: 0


Episode = 128
t=2
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 83.87639093575135
Explore rate: 0.2907300390241693
Learning rate: 0.2907300390241693
Streaks: 0


Episode = 128
t=3
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 83.87639093575135
Explore rate: 0.2907300390241693
Learning rate: 0.2907300390241693
Streaks: 0


Episode = 128
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.11865315338487
Explore rate: 0.2907300390241693
Learning rate: 0.29073003902416


Episode = 130
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.8503469073245
Explore rate: 0.2839966563652008
Learning rate: 0.2839966563652008
Streaks: 0


Episode = 130
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.8503469073245
Explore rate: 0.2839966563652008
Learning rate: 0.2839966563652008
Streaks: 0


Episode = 130
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.87917158274036
Explore rate: 0.2839966563652008
Learning rate: 0.2839966563652008
Streaks: 0


Episode = 130
t=13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.00972610782662
Explore rate: 0.2839966563652008
Learning rate: 0.2839966563652008
Streaks: 0


Episode = 130
t=14
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.96520062629904
Explore rate: 0.2839966563652008
Learning rate: 0.2839966563652008
Streaks: 0


Episode = 130
t=15
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.02557951464401
Explore rate: 0.2839966563652008
Learning rate: 0.2839966563652008
Streaks: 0




Episode = 131
t=5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.27969717488057
Explore rate: 0.2806687130162734
Learning rate: 0.2806687130162734
Streaks: 0


Episode = 131
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.27908668906774
Explore rate: 0.2806687130162734
Learning rate: 0.2806687130162734
Streaks: 0


Episode = 131
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.27908668906774
Explore rate: 0.2806687130162734
Learning rate: 0.2806687130162734
Streaks: 0


Episode = 131
t=8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.30961608002654
Explore rate: 0.2806687130162734
Learning rate: 0.2806687130162734
Streaks: 0


Episode = 131
t=9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.317659896902
Explore rate: 0.2806687130162734
Learning rate: 0.2806687130162734
Streaks: 0


Episode = 131
t=10
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 89.3418557142441
Explore rate: 0.2806687130162734
Learning rate: 0.2806687130162734
Streaks: 0


Episo

Reward: 1.0
Best Q: 72.54538959642706
Explore rate: 0.2740883677049518
Learning rate: 0.2740883677049518
Streaks: 0


Episode = 133
t=32
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 72.62063948994196
Explore rate: 0.2740883677049518
Learning rate: 0.2740883677049518
Streaks: 0

Episode 133 finished after 32.000000 time steps

Episode = 134
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.80882808718327
Explore rate: 0.2708352103072299
Learning rate: 0.2708352103072299
Streaks: 0


Episode = 134
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.41493367594586
Explore rate: 0.2708352103072299
Learning rate: 0.2708352103072299
Streaks: 0


Episode = 134
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.07222653609277
Explore rate: 0.2708352103072299
Learning rate: 0.2708352103072299
Streaks: 0


Episode = 134
t=3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 83.82486866121755
Explore rate: 0.2708352103072299
Learning rate: 0.2708352103072299
Streaks: 0


Epi


Episode = 136
t=18
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 72.84162370912682
Explore rate: 0.26440110030182007
Learning rate: 0.26440110030182007
Streaks: 0


Episode = 136
t=19
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 72.84162370912682
Explore rate: 0.26440110030182007
Learning rate: 0.26440110030182007
Streaks: 0


Episode = 136
t=20
Action: 1
State: (0, 0, 1, 0)
Reward: 1.0
Best Q: 72.913430754864
Explore rate: 0.26440110030182007
Learning rate: 0.26440110030182007
Streaks: 0

Episode 136 finished after 20.000000 time steps

Episode = 137
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.58648895346951
Explore rate: 0.2612194415156308
Learning rate: 0.2612194415156308
Streaks: 0


Episode = 137
t=1
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.61369106886742
Explore rate: 0.2612194415156308
Learning rate: 0.2612194415156308
Streaks: 0


Episode = 137
t=2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.61369106886742
Explore rate: 0.261219441515


Episode = 138
t=30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.34995149678078
Explore rate: 0.25806092227080113
Learning rate: 0.25806092227080113
Streaks: 0


Episode = 138
t=31
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.31790266506233
Explore rate: 0.25806092227080113
Learning rate: 0.25806092227080113
Streaks: 0


Episode = 138
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.36924726461022
Explore rate: 0.25806092227080113
Learning rate: 0.25806092227080113
Streaks: 0


Episode = 138
t=33
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.35858651832856
Explore rate: 0.25806092227080113
Learning rate: 0.25806092227080113
Streaks: 0


Episode = 138
t=34
Action: 1
State: (0, 0, 4, 1)
Reward: 1.0
Best Q: 53.06640190305819
Explore rate: 0.25806092227080113
Learning rate: 0.25806092227080113
Streaks: 0


Episode = 138
t=35
Action: 0
State: (0, 0, 4, 2)
Reward: 1.0
Best Q: 53.21962142883618
Explore rate: 0.25806092227080113
Learning rate: 0.2580609222708011


Episode = 141
t=20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.4709988494654
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.39664921051519
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=22
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.25467790567974
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=23
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.89808367660792
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=24
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.69352725625868
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.63158130267249
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0



Episode = 141
t=70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.29793150955257
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=71
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.33201141707482
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.33201141707482
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=73
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.56113904931323
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.6520536704133
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.6520536704133
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0




Episode = 141
t=132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.4961906179569
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=133
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.35242562413627
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=134
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.35242562413627
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=135
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.35242562413627
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.4961906179569
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=137
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 86.69350516029515
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streak


Episode = 141
t=182
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 86.37816744261038
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=183
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 86.37816744261038
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.5366040134743
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=185
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.5366040134743
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=186
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.56760308364609
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streaks: 0


Episode = 141
t=187
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.59852505265279
Explore rate: 0.2487208960166577
Learning rate: 0.2487208960166577
Streak

Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.7678765419116
Explore rate: 0.24260397120697585
Learning rate: 0.24260397120697585
Streaks: 0


Episode = 143
t=6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.7678765419116
Explore rate: 0.24260397120697585
Learning rate: 0.24260397120697585
Streaks: 0


Episode = 143
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.81210839745629
Explore rate: 0.24260397120697585
Learning rate: 0.24260397120697585
Streaks: 0


Episode = 143
t=8
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.85623294476288
Explore rate: 0.24260397120697585
Learning rate: 0.24260397120697585
Streaks: 0


Episode = 143
t=9
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 81.90025044416542
Explore rate: 0.24260397120697585
Learning rate: 0.24260397120697585
Streaks: 0


Episode = 143
t=10
Action: 0
State: (0, 0, 4, 2)
Reward: 1.0
Best Q: 53.79143814774525
Explore rate: 0.24260397120697585
Learning rate: 0.24260397120697585
Streaks: 0


Episode = 1


Episode = 144
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.94071194686586
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.94071194686586
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.97199906486617
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=45
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 86.35281596008595
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.78259300717122
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.78259300717122
Explore rate: 0.23957751657678794
Learning rate: 0.2395775165767879

Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=103
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.37624050164149
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=104
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.37624050164149
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=105
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 84.72114099946235
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=106
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 84.72114099946235
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=107
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 84.72114099946235
Explore rate: 0.23957751657678794
Learning rate: 0.23957751657678794
Streaks: 0


Episode = 144
t=108
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 84.75774571041711
Explore rate: 


Episode = 146
t=4
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.59052916180778
Explore rate: 0.23358715288760057
Learning rate: 0.23358715288760057
Streaks: 0


Episode = 146
t=5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 78.10654842873676
Explore rate: 0.23358715288760057
Learning rate: 0.23358715288760057
Streaks: 0


Episode = 146
t=6
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 78.10654842873676
Explore rate: 0.23358715288760057
Learning rate: 0.23358715288760057
Streaks: 0


Episode = 146
t=7
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 78.1576887189309
Explore rate: 0.23358715288760057
Learning rate: 0.23358715288760057
Streaks: 0


Episode = 146
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.47971441401934
Explore rate: 0.23358715288760057
Learning rate: 0.23358715288760057
Streaks: 0


Episode = 146
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.47971441401934
Explore rate: 0.23358715288760057
Learning rate: 0.23358715288760057
Strea

Episode = 147
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.63683203384205
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=3
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.63683203384205
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.66534430238731
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 84.29093088858416
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.52158470604023
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.52158470604023
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Epis


Episode = 147
t=52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.77719814623089
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=53
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 83.44413726435552
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.77719814623089
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.77719814623089
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.80538669869446
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0


Episode = 147
t=57
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 87.4437940732156
Explore rate: 0.2306226739238615
Learning rate: 0.2306226739238615
Streaks: 0



Episode = 149
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.8202486832429
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.84762312988164
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 87.33266768387429
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=12
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.78152951076714
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 83.4773426418022
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 85.92249736806185
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
S


Episode = 149
t=58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.42851293897584
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 87.32387490383223
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=60
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 86.68188584545985
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=61
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 87.20951842145159
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=62
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 85.25882887275661
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=63
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 85.25882887275661
Explore rate: 0.22475374025976358
Learning rate: 0.2247537402597635

Streaks: 0


Episode = 149
t=118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.62366159065769
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=119
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.6537254115425
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=120
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.81585242727618
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=121
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.74305041116116
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=122
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 86.82928539190115
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=123
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.79203371932435
Explore rate: 0.22475374025976358
Learning rate: 0


Episode = 149
t=175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.09411227630244
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.09411227630244
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.12311874167517
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.16404673360587
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=179
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.18360335907641
Explore rate: 0.22475374025976358
Learning rate: 0.22475374025976358
Streaks: 0


Episode = 149
t=180
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.19724748914423
Explore rate: 0.22475374025976358
Learning rate: 0.2247537402


Episode = 150
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.44921408940279
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=25
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.44921408940279
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=26
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.47705785101247
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.47705785101247
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.50483984158518
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=29
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.53256019815919
Explore rate: 0.22184874961635637
Learning rate: 0.2218487496163563


Episode = 150
t=83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.52690796880174
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.52690796880174
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.55457936751145
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=86
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 87.44262144078348
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=87
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.57908479262491
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=88
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.57908479262491
Explore rate: 0.22184874961635637
Learning rate: 0.2218487496163563


Episode = 150
t=133
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.7905319610754
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=134
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.58520158269475
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.85234160221576
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=136
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 87.67141569026113
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.85234160221576
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.85234160221576
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961


Episode = 150
t=183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.05561460816705
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=184
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 87.84218944163328
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=185
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.05585826982383
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=186
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 87.91608953308014
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=187
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.04310227428448
Explore rate: 0.22184874961635637
Learning rate: 0.22184874961635637
Streaks: 1


Episode = 150
t=188
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.04310227428448
Explore rate: 0.22184874961635637
Learning rate: 0.2218487496


Episode = 151
t=34
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.11416031385289
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=35
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.12590972970104
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.12590972970104
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=37
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.15190960126777
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=38
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.22424603586661
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=39
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.6947691837608
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812


Episode = 151
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.33784894079163
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=85
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.31491142867448
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=86
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.37835690596484
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=87
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.37835690596484
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=88
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.9774289633483
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.5042314360638
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812



Episode = 151
t=145
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.32531296902953
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.56575773871052
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=147
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.56575773871052
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=148
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.59079450561131
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=149
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.43069952808489
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=150
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.60058393604945
Explore rate: 0.21896306137886812
Learning rate: 0.2189630613


Episode = 151
t=194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.72360830791527
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=195
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.72360830791527
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=196
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.74829944037732
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=197
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.63813336887297
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.84896639964737
Explore rate: 0.21896306137886812
Learning rate: 0.21896306137886812
Streaks: 2


Episode = 151
t=199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.84896639964737
Explore rate: 0.21896306137886812
Learning rate: 0.2189630613

Best Q: 88.76095977882431
Explore rate: 0.2160964207272651
Learning rate: 0.2160964207272651
Streaks: 3


Episode = 152
t=55
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 88.78524694246637
Explore rate: 0.2160964207272651
Learning rate: 0.2160964207272651
Streaks: 3


Episode = 152
t=56
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 88.80948162241711
Explore rate: 0.2160964207272651
Learning rate: 0.2160964207272651
Streaks: 3


Episode = 152
t=57
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 88.80948162241711
Explore rate: 0.2160964207272651
Learning rate: 0.2160964207272651
Streaks: 3


Episode = 152
t=58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.99278194595169
Explore rate: 0.2160964207272651
Learning rate: 0.2160964207272651
Streaks: 3


Episode = 152
t=59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.99278194595169
Explore rate: 0.2160964207272651
Learning rate: 0.2160964207272651
Streaks: 3


Episode = 152
t=60
Action: 0
State: (0, 0, 1, 1)
Reward: 1.

Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.6895263673802
Explore rate: 0.2104192878355745
Learning rate: 0.2104192878355745
Streaks: 0


Episode = 154
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 86.95367813510336
Explore rate: 0.2104192878355745
Learning rate: 0.2104192878355745
Streaks: 0


Episode = 154
t=13
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.58477872270214
Explore rate: 0.2104192878355745
Learning rate: 0.2104192878355745
Streaks: 0


Episode = 154
t=14
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.58477872270214
Explore rate: 0.2104192878355745
Learning rate: 0.2104192878355745
Streaks: 0


Episode = 154
t=15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.61090274289704
Explore rate: 0.2104192878355745
Learning rate: 0.2104192878355745
Streaks: 0


Episode = 154
t=16
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.6369717931147
Explore rate: 0.2104192878355745
Learning rate: 0.2104192878355745
Streaks: 0


Episode = 154
t=17


Best Q: 53.98329105106437
Explore rate: 0.2020403562628038
Learning rate: 0.2020403562628038
Streaks: 0

Episode 157 finished after 20.000000 time steps

Episode = 158
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.39506223203205
Explore rate: 0.19928292171761497
Learning rate: 0.19928292171761497
Streaks: 0


Episode = 158
t=1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 86.3711496778283
Explore rate: 0.19928292171761497
Learning rate: 0.19928292171761497
Streaks: 0


Episode = 158
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.7414709197536
Explore rate: 0.19928292171761497
Learning rate: 0.19928292171761497
Streaks: 0


Episode = 158
t=3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 86.3711496778283
Explore rate: 0.19928292171761497
Learning rate: 0.19928292171761497
Streaks: 0


Episode = 158
t=4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 87.49554927008955
Explore rate: 0.19928292171761497
Learning rate: 0.19928292171761497
Streaks: 0


Episode =


Episode = 159
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 87.94198155019087
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=44
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.77027763987513
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.01463963631389
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.01463963631389
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.03819600927261
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=48
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.06826469661122
Explore rate: 0.19654288435158607
Learning rate: 0.1965428843515860


Episode = 159
t=104
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.55275317513392
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.50764434515173
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.50764434515173
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.53023175243571
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=108
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.34325972341101
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.55410615801567
Explore rate: 0.19654288435158607
Learning rate: 0.1965428843


Episode = 159
t=154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.30269362693289
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=155
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.37400670079954
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=156
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.3580314252579
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 88.3580314252579
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=158
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 87.71731742849589
Explore rate: 0.19654288435158607
Learning rate: 0.19654288435158607
Streaks: 0


Episode = 159
t=159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.68112669853691
Explore rate: 0.19654288435158607
Learning rate: 0.196542884351


Episode = 160
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.79255334007419
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.79255334007419
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=8
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.8142756161062
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 88.3883424775143
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.8142756161062
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 88.49257702342825
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Epis

Streaks: 1


Episode = 160
t=64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.91084177659232
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.91084177659232
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.9323347859459
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=67
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.79760769656967
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=68
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 88.94522810772744
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=69
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 88.8476458502584
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128



Episode = 160
t=123
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.92945618474
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=124
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.05047470240743
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 88.97413436976316
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=126
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.05704875278127
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=127
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.01141446859113
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=128
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.06950199396809
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks


Episode = 160
t=172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.22956001779755
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=173
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.19425573347628
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=174
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.26049026112386
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=175
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.227908631918
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=176
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.27505375919563
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks: 1


Episode = 160
t=177
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.9150975361146
Explore rate: 0.1938200260161128
Learning rate: 0.1938200260161128
Streaks


Episode = 161
t=21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.48127964926569
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.50138241042885
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=23
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.37301383686531
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=24
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.98879098911921
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=25
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 88.98879098911921
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.3443161533872
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784

Episode = 161
t=80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.55315819553667
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.57312358663955
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=82
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 89.50226060678698
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.6400375818542
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.6400375818542
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.6598369341715
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
St


Episode = 161
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.7878257329873
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=139
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.8073426412614
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.8073426412614
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.82682224996555
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=142
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.6965875438416
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=143
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.83735050116165
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018


Episode = 161
t=187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.8364729666393
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.8364729666393
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=189
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 89.85589690317477
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=190
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.81548778802258
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=191
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.81548778802258
Explore rate: 0.19111413264018784
Learning rate: 0.19111413264018784
Streaks: 2


Episode = 161
t=192
Action: 0
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.81548778802258
Explore rate: 0.19111413264018784
Learning rate: 0.191114132640

Reward: 1.0
Best Q: 90.07452190364066
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=47
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 89.99487698739327
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.99919096519378
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 89.99919096519378
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.0180349890305
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=51
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.00449921424091
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=52
Action: 1
State: (0, 0, 3, 1)


Episode = 162
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.22997204817854
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.2483812227732
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=107
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.90663718879878
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.26639009271716
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.26639009271716
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.28473064661354
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Strea


Episode = 162
t=166
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 89.99276418150401
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.39200943409139
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=168
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.39200943409139
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=169
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.41011328975115
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.41011328975115
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Streaks: 3


Episode = 162
t=171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.42818303322196
Explore rate: 0.1884249941294066
Learning rate: 0.1884249941294066
Stre


Episode = 163
t=24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.51030280974038
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=25
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.51030280974038
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.52793015042904
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=27
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 90.4983390494106
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=28
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.55440945462662
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=29
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 90.52629967352311
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982


Episode = 163
t=84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.7011062226721
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=85
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.65857942259386
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=86
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.72462311320007
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=87
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.68807643248286
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=88
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.7351316012991
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=89
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.71402675902658
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982



Episode = 163
t=133
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.791415446394
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=134
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 90.72724616875497
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=135
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.88017266972373
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=136
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.88017266972373
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 90.89711296825482
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=138
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 90.64112560451602
Explore rate: 0.18575240426807982
Learning rate: 0.185752404268

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.02461447373102
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.0412864681384
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=184
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 90.92499995802707
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=185
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.05009291529593
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=186
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 90.68712299042059
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 163
t=187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 90.96535454260655
Explore rate: 0.18575240426807982
Learning rate: 0.18575240426807982
Streaks: 4


Episode = 1

Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.08259080209126
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.08259080209126
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.0989182359598
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=35
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.04271105123516
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.15969810491805
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=37
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.15969810491805
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episod

State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 91.19798245657576
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.1958655175484
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.1958655175484
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=87
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.21198554976198
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=88
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.17469588609126
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=89
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.23446115366743
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=9


Episode = 164
t=142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.39028563818196
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=143
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.40604969461917
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=144
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 91.32242865986618
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=145
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.41948144378773
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=146
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 91.12058653039362
Explore rate: 0.18309616062433975
Learning rate: 0.18309616062433975
Streaks: 5


Episode = 164
t=147
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.34036906965586
Explore rate: 0.18309616062433975
Learning rate: 0.1830961606


Episode = 165
t=1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.45952693747448
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.50606802054807
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.48325337352942
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.51529931932359
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 91.35182986539579
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=6
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 91.35182986539579
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Stre


Episode = 165
t=59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.53901341470724
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=60
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.62677199658826
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=61
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.62677199658826
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=62
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.56995998073899
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=63
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.64411574924516
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=64
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.59842053875006
Explore rate: 0.18045606445813134
Learning rate: 0.1804560644581313


Episode = 165
t=119
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.79802428867322
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=120
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.79802428867322
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=121
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.81282525124969
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.82759950459159
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=123
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 91.75975064891726
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=124
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.84231209663714
Explore rate: 0.18045606445813134
Learning rate: 0.1804560644


Episode = 165
t=178
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 91.95773055585138
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=179
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 91.74288662152983
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.9142169033235
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.9142169033235
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.92880818928039
Explore rate: 0.18045606445813134
Learning rate: 0.18045606445813134
Streaks: 6


Episode = 165
t=183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 91.94337314437688
Explore rate: 0.18045606445813134
Learning rate: 0.180456064458


Episode = 166
t=37
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.0476922488254
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=38
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.06183399043388
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.06183399043388
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.07595058351166
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=41
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.09004207278082
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.09004207278082
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7



Episode = 166
t=96
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.18689014054415
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.18689014054415
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=98
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.2007843438683
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.2007843438683
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=100
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.21465383886384
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=101
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 92.11592136807346
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=156
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 92.23706074032542
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.30406133953984
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.30406133953984
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.3177471750704
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=160
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 92.31661133051982
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Streaks: 7


Episode = 166
t=161
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.34244075065642
Explore rate: 0.1778319206319825
Learning rate: 0.1778319206319825
Strea

Reward: 1.0
Best Q: 92.42634098219817
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.42634098219817
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.4396118154492
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 92.42520653354288
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.39909429534075
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.39909429534075
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=20
Action: 0
State: (0, 0, 2, 1)

Streaks: 8


Episode = 167
t=72
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.62197579752898
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=73
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.63490383253595
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=74
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 92.54855307516151
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=75
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.64347389161072
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=76
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 92.57807580169047
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=77
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.64501956508022
Explore rate: 0.1752235375244543
Learning rate: 0.175223537524454


Episode = 167
t=129
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.70769237119568
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.70769237119568
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=131
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 92.35204379230238
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.6157313284191
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=133
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 92.41118703196851
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=134
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 92.41118703196851
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Strea

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.77700298123183
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=187
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.7896593721234
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=188
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 92.63484760362569
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=189
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.81165455521587
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.81165455521587
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.8242502283937
Explore rate: 0.1752235375244543
Learning rate: 0.1752235375244543
Streaks: 8


Episode = 167
t=192
Acti


Episode = 168
t=45
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.92682565463815
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.92682565463815
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=47
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 92.67842401632006
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.92856458500485
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=49
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 92.73381343489328
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.92856458500485
Explore rate: 0.17263072694617476
Learning rate: 0.1726307269461747


Episode = 168
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.9870904697327
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=95
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.9870904697327
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=96
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 92.99919690643489
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=97
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 92.9621010064526
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.01128218608257
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.01128218608257
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
S

Episode = 168
t=152
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 92.90890096255828
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=153
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.14232649115979
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.14232649115979
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=155
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.15416494278969
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=156
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.09877203225138
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694617476
Streaks: 9


Episode = 168
t=157
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.16629392671926
Explore rate: 0.17263072694617476
Learning rate: 0.17263072694


Episode = 169
t=9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 92.87014883762428
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.16062173460185
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.16062173460185
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.17225232331921
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.05129400558779
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.22660069826037
Explore rate: 0.17005330405836405
Learning rate: 0.170053304058


Episode = 169
t=67
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 92.990368897862
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=68
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 92.990368897862
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=69
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.00228900715348
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=70
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.00228900715348
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=71
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.00228900715348
Explore rate: 0.17005330405836405
Learning rate: 0.17005330405836405
Streaks: 10


Episode = 169
t=72
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.01418884590527
Explore rate: 0.17005330405836405
Learning rate: 0.170053304058364


Episode = 172
t=3
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.23162655686497
Explore rate: 0.16241156176448868
Learning rate: 0.16241156176448868
Streaks: 0


Episode = 172
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.21181985251475
Explore rate: 0.16241156176448868
Learning rate: 0.16241156176448868
Streaks: 0


Episode = 172
t=5
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.23943450847096
Explore rate: 0.16241156176448868
Learning rate: 0.16241156176448868
Streaks: 0


Episode = 172
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.22728473191509
Explore rate: 0.16241156176448868
Learning rate: 0.16241156176448868
Streaks: 0


Episode = 172
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.24846091692619
Explore rate: 0.16241156176448868
Learning rate: 0.16241156176448868
Streaks: 0


Episode = 172
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.24168926926292
Explore rate: 0.16241156176448868
Learning rate: 0.16241156176448868
Stre


Episode = 174
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.23067635120742
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.6892783942794
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.6892783942794
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.7007847946046
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.36023371139551
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.35178682753349
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


E


Episode = 174
t=66
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.36778208805535
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=67
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.13418883868898
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.34704020453009
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=69
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.34704020453009
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=70
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.35908525614448
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.35908525614448
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=125
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.45525363321751
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.45525363321751
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=127
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.46712836689365
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=128
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.236833485003
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=129
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.60890745208405
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=130
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.14963662597582
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streak


Episode = 174
t=184
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.3924337251843
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=185
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.3924337251843
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=186
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.3924337251843
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=187
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.38493596718509
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=188
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.4104280292338
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks: 0


Episode = 174
t=189
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 92.83830714650756
Explore rate: 0.1573907603894379
Learning rate: 0.1573907603894379
Streaks:


Episode = 175
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.6963265383874
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.6963265383874
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.7076400717304
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=47
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.7189360801885
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 92.7189360801885
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=49
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 92.93713579709899
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Ep

Reward: 1.0
Best Q: 93.37941484419585
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.38967026036471
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.39990979069293
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=105
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.1189608653283
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=106
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.37568911869903
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=107
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.16898976234982
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=108
Action: 1
State: (0, 0,


Episode = 175
t=160
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.22636946196636
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=161
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.26467486869704
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=162
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.2427361951868
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=163
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.27174365924589
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=164
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.25765170916846
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Streaks: 1


Episode = 175
t=165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.13344424871087
Explore rate: 0.1549019599857432
Learning rate: 0.1549019599857432
Strea

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.15127157169162
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=19
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.23877919514233
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=20
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.18376416972988
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=21
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 92.8544703048989
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.35649591568358
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.35649591568358
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=

t=74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.39520127532303
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=75
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.0801585142896
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.3446681587184
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.3446681587184
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.35481270406935
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=79
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.35376239996793
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Epi


Episode = 176
t=132
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.39581895195208
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=133
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.18548470682012
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=134
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.18548470682012
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=135
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.40875122685604
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=136
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.40875122685604
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.41879809209027
Explore rate: 0.15242734085788778
Learning rate: 0.1524273408


Episode = 176
t=190
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.23836738755449
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.44406025551899
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.44406025551899
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=193
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.45405330013975
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=194
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.39791135497408
Explore rate: 0.15242734085788778
Learning rate: 0.15242734085788778
Streaks: 2


Episode = 176
t=195
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.46401604195361
Explore rate: 0.15242734085788778
Learning rate: 0.1524273408

Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.62997135310988
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=51
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.62997135310988
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.63952427755585
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=53
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.64906287579224
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.64906287579224
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=55
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.54445094227349
Explore ra


Episode = 177
t=109
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.61673633955074
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=110
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.34685753885151
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=111
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 93.34685753885151
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.69179314856149
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=113
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.69179314856149
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.70125336087479
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Epi


Episode = 177
t=169
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.78796409982712
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=170
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.65098338803993
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=171
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.65098338803993
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=172
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.65098338803993
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=173
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.73563055988052
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Episode = 177
t=174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.73563055988052
Explore rate: 0.149966742310231
Learning rate: 0.149966742310231
Streaks: 3


Epi

State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.72004736954871
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.82329616536752
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.82329616536752
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.8324080392574
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=29
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.80506777758617
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=30
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.80506777758617
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=


Episode = 178
t=83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.90780937357792
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 93.90780937357792
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=85
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.89181387379303
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.93798989213573
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=87
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.93798989213573
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.94693256983258
Explore rate: 0.14752000636314366
Learning rate: 0.1475200063631436


Episode = 178
t=141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.0077225495296
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=142
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 93.97013347445298
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.99434741303692
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 93.99434741303692
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=145
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.00320695211535
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4


Episode = 178
t=146
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 93.9393755454256
Explore rate: 0.14752000636314366
Learning rate: 0.147520006363


Episode = 178
t=199
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.02878468729351
Explore rate: 0.14752000636314366
Learning rate: 0.14752000636314366
Streaks: 4

Episode 178 finished after 199.000000 time steps

Episode = 179
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.06487390998099
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.02878468729351
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=2
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.02878468729351
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=3
Action: 0
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.02878468729351
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.06877990619851
Explore rate: 0.14508697769214

Episode = 179
t=57
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 93.95174833487262
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.16885717722177
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=59
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.16885717722177
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=60
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.17731740610824
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=61
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.09681598340701
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.14179837001343
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=117
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.18761141186974
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=118
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.2592578506845
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=119
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.20633544642132
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=120
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.25998535179616
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=121
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.22244736281779
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.2121066161353
Explore rate: 0.14508697769


Episode = 179
t=173
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.2838195764174
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.2838195764174
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.2921130098334
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=176
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.2586829847134
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=177
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.30267252918377
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks: 5


Episode = 179
t=178
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.30267252918377
Explore rate: 0.1450869776921444
Learning rate: 0.1450869776921444
Streaks:


Episode = 180
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.38054191730355
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.38054191730355
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.38855905786423
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.30313538140166
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=36
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.39136299881764
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.32372429771803
Explore rate: 0.14266750356873148
Learning rate: 0.1426675035687314

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.4204745325148
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=90
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.46002560474717
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=91
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.43402092842172
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.44392253378751
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=93
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.44392253378751
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.4518492508049
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=9


Episode = 180
t=146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.51820514395666
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=147
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.50523475728191
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.53254597664322
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.53254597664322
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.54034625680711
Explore rate: 0.14266750356873148
Learning rate: 0.14266750356873148
Streaks: 6


Episode = 180
t=151
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 94.36346961301012
Explore rate: 0.14266750356873148
Learning rate: 0.1426675035


Episode = 181
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.60265514169143
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.56294378342416
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.5818430445176
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.5818430445176
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.58944262914905
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.55756822226647
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streak


Episode = 181
t=63
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.60521311301675
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.64083216630074
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.64083216630074
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.64834901194419
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=67
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.63341532947967
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=68
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 94.5092360056222
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305


Episode = 181
t=121
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.74611281551107
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=122
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.7299723920283
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.71120785798955
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.71120785798955
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=125
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.71862599367878
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.71862599367878
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380


Episode = 181
t=181
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.82284197960945
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=182
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.79384379020867
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.78045550554813
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=184
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.78045550554813
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=185
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.78777651349402
Explore rate: 0.14026143380285305
Learning rate: 0.14026143380285305
Streaks: 7


Episode = 181
t=186
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.74883966496128
Explore rate: 0.14026143380285305
Learning rate: 0.1402614338


Episode = 182
t=39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.86677417198476
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.87385127963059
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=41
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 94.66353066849396
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.86184578963042
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.86184578963042
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.86892969196903
Explore rate: 0.13786862068696282
Learning rate: 0.1378686206869628


Episode = 182
t=96
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.90742296476358
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.91444403047949
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=98
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 94.87132371224699
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.95405001341044
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.95405001341044
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 94.9610067950575
Explore rate: 0.13786862068696282
Learning rate: 0.137868620686962

Reward: 1.0
Best Q: 95.00106363678925
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=154
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 94.87325347237821
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.95779195663063
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.95779195663063
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=157
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 94.96474357931218
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=158
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 94.89186029927862
Explore rate: 0.13786862068696282
Learning rate: 0.13786862068696282
Streaks: 8


Episode = 182
t=159
Action: 0



Episode = 183
t=13
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 94.9457526166261
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.04081522549225
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.04081522549225
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.04753437133155
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 94.99571503296461
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=18
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.05309458258401
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808


Episode = 183
t=72
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 95.0500857602752
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=73
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.13053902765381
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.13053902765381
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.13713660768353
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=76
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 94.99517800378509
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=77
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.11213160291103
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808

Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 95.1051404349349
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=129
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.1721942943772
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=130
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 95.12076663162074
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=131
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.18506769896845
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.18506769896845
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=133
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.19159139869089
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Ep


Episode = 183
t=187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.24350258404789
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.24994711097625
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=189
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.19007133124349
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=190
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.25391534302425
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.25391534302425
Explore rate: 0.13548891894160808
Learning rate: 0.13548891894160808
Streaks: 9


Episode = 183
t=192
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.20515190617435
Explore rate: 0.13548891894160808
Learning rate: 0.1354889189


Episode = 184
t=44
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.30978532432019
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=45
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.28542891942114
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=46
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 95.13675233132794
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=47
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 95.13675233132794
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.30445703236543
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.30445703236543
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Strea


Episode = 184
t=102
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.3169754010893
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.35287685248666
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.35287685248666
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=105
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 95.35153439021418
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.36397432298052
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.36397432298052
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011



Episode = 184
t=159
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.20590086830718
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.39666070777717
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.39666070777717
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.40278877365644
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=163
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.40890868172048
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011
Streaks: 10


Episode = 184
t=164
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.38047489602886
Explore rate: 0.1331221856625011
Learning rate: 0.1331221856625011


Episode = 185
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.42932365118517
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=18
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.44108346710203
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=19
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.43682307883788
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.46352305287081
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.46352305287081
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.46945532575937
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.49061827952379
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=75
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.49651512045043
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.49651512045043
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=77
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 95.41343391752591
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.51562272365919
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=79
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.51562272365919
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode =

Reward: 1.0
Best Q: 95.57428410905254
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.58007154161272
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=133
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 95.56482102542266
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.53872483807999
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.53872483807999
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.54455877088729
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=137
Acti


Episode = 185
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.58530753566062
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=181
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.55551804497766
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.61835818829775
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.61835818829775
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.62408798594245
Explore rate: 0.13076828026902376
Learning rate: 0.13076828026902376
Streaks: 11


Episode = 185
t=185
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.43048200996306
Explore rate: 0.13076828026902376
Learning rate: 0.13076



Episode = 186
t=40
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.4856167817366
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.64324194251749
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.64324194251749
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.64883719899609
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=44
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.65442526965104
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.65442526965104
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445

State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.58266727259422
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.67866773578115
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.67866773578115
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.68421749595339
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=102
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.6514880328416
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=103
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.69042962207277
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episod


Episode = 186
t=147
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.72642625307621
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.74065718915699
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.74065718915699
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.746127338094
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=151
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.7515904618793
Explore rate: 0.12842706445412122
Learning rate: 0.12842706445412122
Streaks: 12


Episode = 186
t=152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.7515904618793
Explore rate: 0.12842706445412122
Learning rate: 0.128427064


Episode = 187
t=6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.81223871501709
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 95.69725819279213
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=8
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 95.69725819279213
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=9
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 95.69725819279213
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.79177901868636
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.79177901868636
Explore rate: 0.12609840213553858
Learning rate: 0.126098402135538

Streaks: 13


Episode = 187
t=63
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.73559954599534
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.84708028437622
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.84708028437622
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=66
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.85231704977959
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.85231704977959
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.85754721170547
Explore rate: 0.12609840213553858
Learning rate: 

Episode = 187
t=121
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 95.88710537881106
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=122
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.91531039853703
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 95.91531039853703
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=124
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 95.77522144566072
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=125
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.89462315916313
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.89462315916313
Explore rate: 0.12609840213553858
Learning rate: 0.126098

Episode = 187
t=179
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.93048686839299
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.93048686839299
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.93561845942664
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=182
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 95.9136269262084
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=183
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 95.94247706409264
Explore rate: 0.12609840213553858
Learning rate: 0.12609840213553858
Streaks: 13


Episode = 187
t=184
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.87208541404463
Explore rate: 0.12609840213553858
Learning rate: 0.1260984


Episode = 188
t=38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.00475427176018
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.00475427176018
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.00969967319627
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=41
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 95.97981044675542
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=42
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.01530408282373
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=43
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.89908809597257
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Strea

Streaks: 14


Episode = 188
t=97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.05433321074553
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.05433321074553
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.05921724230032
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=100
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.9306559429197
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.02463911042092
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.02463911042092
Explore rate: 0.1237821594083578
Learning rate: 0.1237821


Episode = 188
t=154
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.04798549189348
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.0954321391072
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.0954321391072
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=157
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.10026529752098
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=158
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.07216159380295
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
Streaks: 14


Episode = 188
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.07882358507122
Explore rate: 0.1237821594083578
Learning rate: 0.1237821594083578
S


Episode = 189
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.12652733717522
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.12191422384676
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.12542036379355
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=15
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.99534400525432
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=16
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.99534400525432
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=17
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 95.99534400525432
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449


Episode = 189
t=69
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.16100796410454
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.1656715027006
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=71
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.133829849279
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=72
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.17059203321074
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=73
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.17059203321074
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=74
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 96.09267809754462
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879

Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.19452965970589
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.20876199191537
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=127
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.20876199191537
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.21336751977586
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=129
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.1995186668988
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=130
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.22034308400806
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 


Episode = 189
t=183
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.2376066207571
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=184
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.26694935801824
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=185
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.11175770819827
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.26296932584131
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.26296932584131
Explore rate: 0.12147820449879354
Learning rate: 0.12147820449879354
Streaks: 15


Episode = 189
t=188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.26750900360585
Explore rate: 0.12147820449879354
Learning rate: 0.121478


Episode = 190
t=40
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 96.19090419410078
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.27621108499768
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.27621108499768
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.28064933523652
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.28508229568433
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=45
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.26826950620793
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771


Episode = 190
t=100
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.32719968954818
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=101
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.36952550605659
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=102
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 96.20550986621932
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.32661043954988
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.32661043954988
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.33098862060851
Explore rate: 0.11918640771920863
Learning rate: 0.11918


Episode = 190
t=159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.36523823467658
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=160
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.36957037665383
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=161
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.37389735530667
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.37389735530667
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=163
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.33532514286695
Explore rate: 0.11918640771920863
Learning rate: 0.11918640771920863
Streaks: 16


Episode = 190
t=164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.37543516084672
Explore rate: 0.11918640771920863
Learning rate: 0.11918


Episode = 191
t=7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.41215946034124
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.41635388421582
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=9
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.39231120790731
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.3920226026227
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.3920226026227
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.39624056782132
Explore rate: 0.11690664142431006
Learning rate: 0.1169066414243100


Episode = 191
t=67
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.43250371450038
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=68
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.44901853125904
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=69
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.44901853125904
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=70
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.34068784178274
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.453331406993
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=72
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.453331406993
Explore rate: 0.11690664142431006
Learning rate: 0.116906641424310

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.49252163475538
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.49252163475538
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=128
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.47454529145257
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=129
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.47329293230045
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.47329293230045
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=131
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.47741588708617
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Epis


Episode = 191
t=185
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.53237236651785
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=186
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.5156676314708
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=187
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.53449288799573
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=188
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.52191983695805
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=189
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.53708912153111
Explore rate: 0.11690664142431006
Learning rate: 0.11690664142431006
Streaks: 17


Episode = 191
t=190
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.52774159987383
Explore rate: 0.11690664142431006
Learning rate: 0.116906


Episode = 192
t=44
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.58093297181418
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=45
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.58991684440542
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=46
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 96.43576019208103
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.5400975204741
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.5400975204741
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.54406391046473
Explore rate: 0.11463877996848804
Learning rate: 0.1146387799684


Episode = 192
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.5713522367392
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=104
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.52420831549023
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=105
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.62977031077713
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.62977031077713
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.633633900975
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=108
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.58972436833969
Explore rate: 0.11463877996848804
Learning rate: 0.11463877


Episode = 192
t=162
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.6393776618584
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.6122298438146
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.6122298438146
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.6161135421898
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=166
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.55703813629066
Explore rate: 0.11463877996848804
Learning rate: 0.11463877996848804
Streaks: 18


Episode = 192
t=167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.66292581825846
Explore rate: 0.11463877996848804
Learning rate: 0.114638779


Episode = 193
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.69435312841274
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.66243549527499
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.64371404265762
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=13
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.64371404265762
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=14
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.64748592742494
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.64748592742494
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966

t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.67958729109408
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=67
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.68331886053635
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.68704623634014
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=69
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.66794537341951
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=70
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.69194904782834
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=71
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.61220815312421
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks:


Episode = 193
t=115
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.72398692066017
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.71597976420921
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.71597976420921
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.71967043480771
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=119
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.64160763285686
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.74034419629422
Explore rate: 0.11238269966426384
Learning rate: 0.11238


Episode = 193
t=173
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.74080017443957
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.76157402143076
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.76157402143076
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=176
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.7652134519721
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.7652134519721
Explore rate: 0.11238269966426384
Learning rate: 0.11238269966426384
Streaks: 19


Episode = 193
t=178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.76884879242316
Explore rate: 0.11238269966426384
Learning rate: 0.1123826


Episode = 194
t=31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.80664818324729
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.81016528597243
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=33
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.77563161978847
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.80740267296352
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.80740267296352
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.81091894470667
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874


Episode = 194
t=89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.84027841204592
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.84375847501593
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=91
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.82684740161888
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.84102457194584
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=93
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.84102457194584
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.84450381310818
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874


Episode = 194
t=148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.85876518614039
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.85876518614039
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.86222488809561
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=151
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.8534548049117
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.86466018167101
Explore rate: 0.11013827874181159
Learning rate: 0.11013827874181159
Streaks: 20


Episode = 194
t=153
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.75472425087021
Explore rate: 0.11013827874181159
Learning rate: 0.110138

State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 96.73699831504392
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.88363568361714
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.88363568361714
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.88699840891435
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.8712576545405
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=10
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.89167261048566
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195



Episode = 195
t=64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.91346373266045
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=65
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.84880067589349
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.95032152649422
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.95032152649422
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.95361229416773
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=69
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.93129630106729
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730


Episode = 195
t=123
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.93482573796737
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=124
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.95457356730111
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=125
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.9402428148296
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=126
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.9563288449093
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=127
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.88992402466715
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 96.97376861328958
Explore rate: 0.10790539730951965
Learning rate: 0.1079053


Episode = 195
t=179
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 96.97967265341808
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.98607657608501
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.98607657608501
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 96.98932876213019
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=183
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 96.97180358201433
Explore rate: 0.10790539730951965
Learning rate: 0.10790539730951965
Streaks: 21


Episode = 195
t=184
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.00088833950838
Explore rate: 0.10790539730951965
Learning rate: 0.10790


Episode = 196
t=28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.02295316745133
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=29
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.01152449292172
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=30
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.0277174298415
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=31
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.0163770515004
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=32
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.02967214421439
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=33
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 96.87806435168895
Explore rate: 0.10568393731556158
Learning rate: 0.1056839373155


Episode = 196
t=88
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 96.96394746588841
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=89
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.04428376227244
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.04428376227244
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.04740747956835
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=92
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.04108195076186
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=93
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.05265671205137
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731


Episode = 196
t=145
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.06526281567828
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.06526281567828
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=147
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.04334541434285
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=148
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.06884496962206
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=149
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.04913806778949
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=150
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.06988085371881
Explore rate: 0.10568393731556158
Learning rate: 0.10568


Episode = 196
t=195
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.0820839835519
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=196
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.09796395590287
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=197
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 96.98709923632438
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.10157036153241
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22


Episode = 196
t=199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.10157036153241
Explore rate: 0.10568393731556158
Learning rate: 0.10568393731556158
Streaks: 22

Episode 196 finished after 199.000000 time steps

Episode = 197
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.08943148798289
Explore ra


Episode = 197
t=54
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.12908980086938
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=55
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.12376641829141
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.13009623159577
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.13009623159577
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.13306582957935
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=59
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.03099425772389
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Strea


Episode = 197
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.15598455549974
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=113
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.04436366116796
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.15615177540654
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.15615177540654
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=116
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.15909441273338
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=117
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.15065669567959
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447


Episode = 197
t=170
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.07457595181012
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.15729052912242
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.15729052912242
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.16023198813772
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.16317040351412
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447
Streaks: 23


Episode = 197
t=175
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.14129514379268
Explore rate: 0.1034737825104447
Learning rate: 0.1034737825104447

State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.2046604371377
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=29
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.22420236250986
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=30
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.20945072609797
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.18089208007784
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.18089208007784
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.18374712650454
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 


Episode = 198
t=86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.21271353203191
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=87
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.21553635134093
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=88
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.11220450167434
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.21553635134093
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.21553635134093
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=91
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.21835631184481
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841

Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.26293383812742
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=147
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.24795226144695
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=148
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.266694942182
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=149
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.25261857276884
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.24827961987722
Explore rate: 0.10127481841050645
Learning rate: 0.10127481841050645
Streaks: 24


Episode = 198
t=151
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.24827961987722
Explore ra

Best Q: 97.24918869460038
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.2751860691354
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.25451324598474
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.27586427357436
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.23053071036472
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.3028461389528
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.3028461389528
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episod

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.29834532084047
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=64
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.31349993630062
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=65
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.25094343460427
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.31969615792036
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.31969615792036
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.32237646176245
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=69
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.29687054923244
Explore rate: 0.09908693226233


Episode = 199
t=123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.31668840914077
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=124
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.30530273330847
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=125
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.3512366889138
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.3512366889138
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.35388545222489
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=128
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.26049901077795
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=129
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.3206595192


Episode = 199
t=184
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.37629078448988
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=185
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.35577810255289
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=186
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.35577810255289
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=187
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.29334312995037
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.33593516057222
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=189
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.33593516057222
Explore rate: 0.099086932262331
Learning rate: 0.1
Streaks: 25


Episode = 199
t=190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.33859922

t=40
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.36740977479036
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.36740977479036
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.37004236501556
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.36317098153773
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=44
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.37435901755772
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=45
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.36691542612218
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.3968159156683
Expl


Episode = 200
t=99
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.40257421435506
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=100
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.39618336967625
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=101
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.40601770239184
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.40601770239184
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.40861168468945
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=104
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.38570317045378
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=105
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 


Episode = 200
t=158
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.44151805101144
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.44151805101144
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=160
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.31677224453765
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.43277413010948
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.43277413010948
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.43534135597938
Explore rate: 0.09691001300805646
Learning rate: 0.1
Streaks: 26


Episode = 200
t=164
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q:


Episode = 201
t=16
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.45172618959032
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.44287887117167
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=18
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.45339857887728
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=19
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.44647744336336
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.44765939073146
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.44765939073146
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.450


Episode = 201
t=76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.47525626296016
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=77
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.47525626296016
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.4777810066972
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=79
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.45316810137463
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=80
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.48013654515755
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=81
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.45838480920776
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=82
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.4805


Episode = 201
t=134
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.5044429788045
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=135
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.489124826535
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.50715918211128
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.50715918211128
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.50965202292917
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.49324050619681
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97

Episode = 201
t=192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.53816976209617
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=193
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.50931362522728
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=194
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.5382949175885
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.5382949175885
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.54075662267091
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=197
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.52041772320408
Explore rate: 0.09474395125154877
Learning rate: 0.1
Streaks: 27


Episode = 201
t=198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97

Best Q: 97.5688331063483
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=50
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.55599418427276
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=51
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.54181895899342
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.54181895899342
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=53
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.54427714003442
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=54
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.52521330263087
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=55
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.5470556935654
Explore rate: 0.09258863922541383
Learning rate: 0.1


Episode = 202
t=106
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.56858939439877
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=107
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.47479549310815
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.57175966094142
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.57175966094142
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.57418790128048
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=111
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.56552925752527
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:


Episode = 202
t=164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.60406049569397
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=165
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.58714209121794
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=166
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.60693785870903
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=167
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.59151473010834
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=168
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.59502338090778
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.59502338090778
Explore rate: 0.09258863922541383
Learning rate: 0.1
Streaks: 28


Episode = 202
t=170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q:


Episode = 203
t=23
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.61167291038218
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.62393433640493
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=25
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.62393433640493
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.62631040206853
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=27
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.60521564556845
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=28
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.5376109269742
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=29
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.5376


Episode = 203
t=85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.66351205977725
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.66351205977725
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=87
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.66584854771747
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=88
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.63922551648264
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.64235885161219
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.64235885161219
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=91
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.644

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.66831253231793
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=144
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.66097629085539
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=145
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.67520171708037
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.67520171708037
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=147
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.67752651536328
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=148
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.6600161586926
Explore rate: 0.09044397075882471
Learning rate: 0.1
Streaks: 29


Episode = 203
t=149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.67201645060173
Explore rate:

State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.67766803023027
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.69083936371362
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.68129432421489
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.69220356543953
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.68469304477192
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.69921679189758
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.69921679189758
Explore rate: 0.08830984

Learning rate: 0.1
Streaks: 30


Episode = 204
t=62
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.70068797493371
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=63
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.7160226083328
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=64
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.71665333840227
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.71665333840227
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.71893668506387
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=67
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.71167684429508
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=68
Action: 0
State: (0, 0, 3

Episode = 204
t=119
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.73787507597613
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=120
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.73787507597613
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=121
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.74013720090016
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=122
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.7263358056204
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=123
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.74306460208575
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=124
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.74306460208575
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=125
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 9

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.75828783375314
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.7605295459194
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=179
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.75273332529412
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=180
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.76401271394025
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=181
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.68083605724749
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.75211238930581
Explore rate: 0.08830984124613883
Learning rate: 0.1
Streaks: 30


Episode = 204
t=183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.75211238930581
Explore rate:


Episode = 205
t=35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.75607924448695
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=36
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.75470068398842
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.75818668775311
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=38
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.75729109767714
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=39
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.76033983764783
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.77831320763447
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.778

Episode = 205
t=93
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.7907935416905
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=94
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.72398371907167
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=95
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.72398371907167
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.77806827371813
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=97
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.73366252091543
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.77806827371813
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.77806


Episode = 205
t=151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.790461977945
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=152
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.7471449746601
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=153
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.790461977945
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.790461977945
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=155
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.79267151596706
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=156
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.79487884445109
Explore rate: 0.08618614761628329
Learning rate: 0.1
Streaks: 31


Episode = 205
t=157
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.790

Episode = 206
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.81850297355822
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.82470470791031
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.82470470791031
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.8268800032024
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.82210252567054
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=14
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.83053596092067
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.841164

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.84619653876075
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.84619653876075
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.84835034222199
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=69
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.85050199187977
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=70
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.8461000901237
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=71
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.85415024982134
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=72
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.84905095584364
Explore rate: 0.084

Episode = 206
t=122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.86680699036533
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=123
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.86479059710254
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=124
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.87066043415062
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=125
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.80292334580176
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.86279618967743
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.86279618967743
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.88120255536538
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=179
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.87088120924594
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.8906633513045
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.8906633513045
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.8927726879532
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=183
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.81940204029559
Explore rate: 0.08407278830288423
Learning rate: 0.1
Streaks: 32


Episode = 206
t=184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.88229953954419
Explore rate: 0


Episode = 207
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.90705715681983
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.9037037833432
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=38
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.91070176424769
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=39
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.9064928796694
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=40
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.91237438291019
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=41
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.90916865561057
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.90303

Reward: 1.0
Best Q: 97.92232721533856
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.92232721533856
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.92440488812322
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=95
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.91030463314735
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.93080557245685
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=97
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.93080557245685
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.9328747668844
Explore rate: 0.08196966321511989
Learn


Episode = 207
t=153
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.94546444999611
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=154
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.95804401544392
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=155
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.88718670099419
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.95333406038847
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=157
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.89584810287323
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=158
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.95333406038847
Explore rate: 0.08196966321511989
Learning rate: 0.1
Streaks: 33


Episode = 207
t=159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q:


Episode = 208
t=11
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.94172895562005
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=12
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.89626201538218
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=13
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.89626201538218
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.9737229276496
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.9737229276496
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.97574920472195
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 97.97130


Episode = 208
t=70
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.94712380577755
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=71
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.97580787341631
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=72
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.95201640466802
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=73
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.99793790159386
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=74
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 97.93751845233214
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.99793790159386
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 97.997


Episode = 208
t=127
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.02193985911038
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=128
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.00138638676239
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=129
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.99347722540264
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.99347722540264
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=131
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 97.99548374817724
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=132
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 97.98314427513532
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=133
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:


Episode = 208
t=186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.04319320614542
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=187
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.9450424783191
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=188
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.01430952041491
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=189
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.01430952041491
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=190
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.0162952108945
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.01827891568361
Explore rate: 0.07987667370927609
Learning rate: 0.1
Streaks: 34


Episode = 208
t=192
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 9


Episode = 209
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.05255917322289
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=45
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.0426697111244
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=46
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.05528025404602
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=47
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 97.96335444694505
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.03679092244197
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.03679092244197
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.0387541315


Episode = 209
t=102
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.05741069848035
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.05891946603239
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.05891946603239
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.06086054656636
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=106
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.04596595159013
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=107
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.06307034662524
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=108
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.03


Episode = 209
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.08287516041148
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.08287516041148
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.08479228525107
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=162
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.07914846634628
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=163
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.08787244183752
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=164
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.08193299145357
Explore rate: 0.0777937225609836
Learning rate: 0.1
Streaks: 35


Episode = 209
t=165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.07


Episode = 210
t=17
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.08975745679336
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.10892303721226
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.10892303721226
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.11081411417506
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.08441190682791
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.103332532421
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.10333253242


Episode = 210
t=78
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.12773496193871
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=79
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.02191276430851
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.1224700646566
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.1224700646566
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.12434759459194
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=83
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.11013675159103
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.11913082941

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.13249104299291
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.13249104299291
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.13435855194992
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=139
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.09663777727714
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.14163172730407
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.14163172730407
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.14349009557677
Explore rate: 0.07

Best Q: 98.15165336508113
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=195
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.15165336508113
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.15350171171605
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=197
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.1418224762715
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=198
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.15774385614888
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36


Episode = 210
t=199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.15774385614888
Explore rate: 0.0757207139381183
Learning rate: 0.1
Streaks: 36

Episode 210 finished after 199.000000 time steps

Episode = 211
t=0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.15585381415478
Expl


Episode = 211
t=53
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.17546349185778
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.17546349185778
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.17728802836592
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=56
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.16673689318596
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.17093425123632
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.17093425123632
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.172


Episode = 211
t=110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.19454835694876
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.19454835694876
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.19635380859181
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=113
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.1870359272828
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.18124026251492
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.18124026251492
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=116
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 


Episode = 211
t=170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.20784844597375
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.20784844597375
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.20964059752778
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=173
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.20178554466486
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.19964982046532
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.19964982046532
Explore rate: 0.07365755337434499
Learning rate: 0.1
Streaks: 37


Episode = 211
t=176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.22169650851384
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=29
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.21212394129537
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.22390750577128
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.22390750577128
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.2256835982655
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=33
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.21617044896429
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.22412760099304
Explore rate: 0.071


Episode = 212
t=86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.24456377981387
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=87
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.24456377981387
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.24631921603407
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=89
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.1760383207449
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.24516251319612
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.24516251319612
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.2469


Episode = 212
t=146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.25858270783493
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=147
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.25858270783493
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.2603241251271
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=149
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.24681525753637
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.2554665042947
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.2554665042947
Explore rate: 0.07160414774328616
Learning rate: 0.1
Streaks: 38


Episode = 212
t=152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98

Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.27620122059233
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=5
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.27620122059233
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.27792501937174
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=7
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.264123654326
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=8
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.2798306266954
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.21217819901968
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.26406962921419
Explore rate: 0.


Episode = 213
t=64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.28609874096942
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.28609874096942
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.28781264222846
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=67
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.2818235729061
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.29175826559947
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=69
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.29175826559947
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.2934


Episode = 213
t=124
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.30882323031496
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.29571969215044
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=126
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.30526843969106
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=127
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.30526843969106
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.30696317125137
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=129
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.30161613160797
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:

Best Q: 98.31221957651303
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=182
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.32064008332891
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=183
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.26508113585533
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=184
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.31669352303533
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=185
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.31669352303533
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.31837682951229
Explore rate: 0.06956040523329987
Learning rate: 0.1
Streaks: 39


Episode = 213
t=187
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.31519726089768
Explore rate: 0.06956040523329987
Learning r


Episode = 214
t=40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.33440555615361
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.33440555615361
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.33607115059746
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.32578821591065
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.34209496311847
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.34209496311847
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.343


Episode = 214
t=101
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.34384678231852
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.35513400134559
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.35513400134559
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.35677886734425
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=105
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.3494251708068
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.35711031117053
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 

State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.29602416696375
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=161
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.36430113638995
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.36430113638995
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.36593683525356
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=164
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.35927992270642
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.3660776611705
Explore rate: 0.06752623532284674
Learning rate: 0.1
Streaks: 40


Episode = 214
t=166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.3660776611705
Explore rate: 


Episode = 215
t=19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.37934146376779
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.38096212230403
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.38087724969816
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.37737019193159
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.37737019193159
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.37899282173966
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=25
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.328


Episode = 215
t=79
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.39172172399536
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.39333000227137
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=81
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.34341428870671
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.40215842375237
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.40215842375237
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.40375626532861
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=85
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.396


Episode = 215
t=137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.41122685918543
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.41281563232624
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.41440281669392
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=140
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.40806428979133
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=141
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.41678793717885
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.40262982868448
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q:

Streaks: 41


Episode = 215
t=195
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.43120939790653
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=196
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.43277818850862
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=197
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.41465781780329
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=198
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.43396199325062
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41


Episode = 215
t=199
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.43396199325062
Explore rate: 0.06550154875643233
Learning rate: 0.1
Streaks: 41

Episode 215 finished after 199.000000 time steps

Episode = 216
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.43552803125736
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 



Episode = 216
t=53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.4415084578646
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.44306694940674
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=55
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.43262850218133
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=56
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.43262850218133
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=57
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.3930873825865
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.43836422793645
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=59
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.4383642279

Learning rate: 0.1
Streaks: 42


Episode = 216
t=111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.45372281010096
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.45372281010096
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=113
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.45526908729086
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=114
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.44818876169907
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.44480603244195
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=116
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.41002675124216
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=117
Action: 1
State: (0, 0,


Episode = 216
t=170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.45659495224288
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.45813835729064
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=172
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.4473904479696
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=173
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.460003851389
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=174
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.45019178446015
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.47404002081339
Explore rate: 0.0634862575211067
Learning rate: 0.1
Streaks: 42


Episode = 216
t=176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.47404

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.47555021742573
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=29
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.47555021742573
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.47707466720831
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=31
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.46495071252193
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.48858460464007
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.48858460464007
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.49009602003542
Explore rate: 0.06

Episode = 217
t=83
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.44961768343326
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.49184035291762
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.49184035291762
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.49334851256471
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=87
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.49485516405214
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=88
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.4849897567435
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=89
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.48498


Episode = 217
t=145
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.50853905577675
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.51003051672097
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=147
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.46197414180338
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=148
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.51314610065941
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=149
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.51314610065941
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.51463295455875
Explore rate: 0.06148027482350815
Learning rate: 0.1
Streaks: 43


Episode = 217
t=151
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:


Episode = 218
t=3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.52639089008638
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.52786449919628
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=5
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.52933663469709
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.52933663469709
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=7
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.51523668453217
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.5173990536643
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.51739905366


Episode = 218
t=60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.53139769459833
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=61
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.53139769459833
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=62
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.53286629690373
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=63
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.52333497135133
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=64
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.53471024970993
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=65
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.52593778893748
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=66
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.535


Episode = 218
t=118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.54911507066046
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=119
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.54155109954111
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=120
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.5511229188854
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=121
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.54395715855665
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.54970552321949
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.54970552321949
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 

State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.5251268755168
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.55928909215041
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=176
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.55928909215041
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.56072980305825
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=178
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.54963429312751
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=179
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.5623659609493
Explore rate: 0.05948351506743277
Learning rate: 0.1
Streaks: 44


Episode = 218
t=180
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.55234509394874
Explore rate: 

Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.57417757481731
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.57417757481731
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.5756033972425
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.56986713339248
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.57284653172897
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=37
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.57284653172897
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 


Episode = 219
t=89
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.58059193939798
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.58692043360823
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.58692043360823
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.58833351317462
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=93
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.58236475901707
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.58944494527199
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=95
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.589

Best Q: 98.61013025107536
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=149
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.59987968138226
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=150
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.59987968138226
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=151
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.54915426287205
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.59647590609303
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=153
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.59647590609303
Explore rate: 0.05749589383191933
Learning rate: 0.1
Streaks: 45


Episode = 219
t=154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.59787943018694
Explore rate: 0.05749589383191933
Learning r

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.61903290519285
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.62041387228766
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.60733324211148
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.6114840570193
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.6114840570193
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.61287257296227
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.60353365793512
Explore rate: 0.05551732

Best Q: 98.63751974295508
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.63751974295508
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.63888222321212
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=67
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.6216480062574
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.63168412285631
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=69
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.63168412285631
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.63305243873346
Explore rate: 0.05551732784983132
Learning rate: 0.

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.63728849703217
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=122
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.64771123108477
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=123
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.63968305920635
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.64568366125638
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=125
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.64568366125638
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.64703797759513
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=127
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.64098137950361
Explore rate

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.65577966081442
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=178
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.66376231555111
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=179
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.58063036228074
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.6595691412347
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.6595691412347
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.66090957209346
Explore rate: 0.05551732784983132
Learning rate: 0.1
Streaks: 46


Episode = 220
t=183
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.65120341346852
Explore rate: 


Episode = 221
t=34
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.67291238427069
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=35
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.67156675007399
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=36
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.67410625410095
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=37
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.67314659422259
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=38
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.6753371415189
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.67469031181069
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.6682

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.6876869424803
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=92
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.60829975501161
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=93
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.60829975501161
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.67651692026645
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=95
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.67651692026645
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.67784040334618
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=97
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.67633132805045
Explore rate: 0.053


Episode = 221
t=149
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.6872967196356
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=150
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.6850136210386
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=151
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.69713789181282
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.69713789181282
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=153
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.69333437993365
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.68838339615486
Explore rate: 0.05354773498692689
Learning rate: 0.1
Streaks: 47


Episode = 221
t=155
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 9


Episode = 222
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.69997825466929
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.70127827641463
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.6999975869664
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.7040013559581
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.7040013559581
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.70529735460214
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.701


Episode = 222
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.71462707002809
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.71462707002809
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.71591244295806
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=69
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.71316692778429
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.71670678418904
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=71
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.71670678418904
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 


Episode = 222
t=127
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.73185771987418
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.73312586215431
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=129
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.72754730081748
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=130
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.73498064544387
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=131
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.73498064544387
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=132
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.72955565463467
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=133
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
B

State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.73029579080399
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=186
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.73050930366158
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=187
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.73050930366158
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=188
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.73177879435792
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=189
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.73047515887089
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.74742784725859
Explore rate: 0.051587034221398986
Learning rate: 0.1
Streaks: 48


Episode = 222
t=191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.74742784725859
Explor


Episode = 223
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.75638386165153
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.75638386165153
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.75762747778988
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=47
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.74120133817561
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=48
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.75836179776027
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=49
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.75836179776027
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.745

Episode = 223
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.7576972975098
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.7576972975098
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.75893960021229
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=106
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.7558071695978
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=107
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.76098750434105
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.76325505546484
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.


Episode = 223
t=163
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.7733808776722
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=164
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.76412471861879
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.76561464500766
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=166
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.76561464500766
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=167
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.76684903036265
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=168
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.76808218133229
Explore rate: 0.04963514562387694
Learning rate: 0.1
Streaks: 49


Episode = 223
t=169
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 


Episode = 224
t=21
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.74412714201898
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.78371893808458
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=23
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.78371893808458
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.78493521914649
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.77915982238697
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=26
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.77915982238697
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=27
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 

Learning rate: 0.1
Streaks: 50


Episode = 224
t=79
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.79330055620976
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=80
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.79450725565354
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=81
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.75453269976143
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.79798492033983
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.79798492033983
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.79918693541948
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=85
Action: 0
State: (


Episode = 224
t=137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.81081110755326
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=138
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.75347755933696
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.80155339189126
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.80155339189126
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.80275183849938
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.80394908666088
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=143
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
B


Episode = 224
t=195
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.78441455770988
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=196
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.78563014315218
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.81298984298441
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.81298984298441
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50


Episode = 224
t=199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.81417685314142
Explore rate: 0.047691990337874746
Learning rate: 0.1
Streaks: 50

Episode 224 finished after 199.000000 time steps

Episode = 225
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.81134152920909
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=1


Episode = 225
t=52
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.82708738529908
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.82510278505288
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.82510278505288
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=55
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.77748984037378
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=56
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.82708738529908
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=57
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.82708738529908
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.828


Episode = 225
t=109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.83823141467155
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.83939318325689
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=111
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.8328385262908
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.83549752176629
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=113
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.83549752176629
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.83666202424452
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=115
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 

Learning rate: 0.1
Streaks: 51


Episode = 225
t=165
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.84096022642524
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=166
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.84096022642524
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=167
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.83132359051218
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=168
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.84220837503963
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=169
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.83356986058989
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=170
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.84251095373406
Explore rate: 0.04575749056067513
Learning rate: 0.1
Streaks: 51


Episode = 225
t=171
Action: 0
State: 


Episode = 226
t=23
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.855242649514
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=24
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.86102276918055
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.85695963871147
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=26
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.86175949649493
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.86175949649493
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=28
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.81688770469906
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=29
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.81688


Episode = 226
t=80
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.86385324904462
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=81
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.86498939579558
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.86612440639978
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=83
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.85899604169131
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.86899568216084
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.86899568216084
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=86
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.870


Episode = 226
t=139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.87709159046574
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=140
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.87821449887528
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.87821449887528
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.87933628437641
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=143
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.87165745931587
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=144
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.8807053417551
Explore rate: 0.04383156952463674
Learning rate: 0.1
Streaks: 52


Episode = 226
t=145
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 


Episode = 227
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.89214245500501
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.8849012482936
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.89353050487608
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.88687064344697
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.89397764808973
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=5
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.83621092522735
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.89425358737


Episode = 227
t=58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.90033125085775
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=59
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.86486401965054
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=60
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.9001300303167
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.9001300303167
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.90122990028638
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=63
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.89482034990114
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=64
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.90268


Episode = 227
t=115
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.86949049068684
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=116
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.86949049068684
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=117
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.91568340492115
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.91568340492115
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=119
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.91676772151624
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=120
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.90978476027696
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=121
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:


Episode = 227
t=174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.92079524791004
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=175
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.91688467252703
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.92015412616047
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.92015412616047
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.92123397203432
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=179
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.88115392783848
Explore rate: 0.04191415147891486
Learning rate: 0.1
Streaks: 53


Episode = 227
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:

Learning rate: 0.1
Streaks: 54


Episode = 228
t=33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.93505880217113
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.93612374336895
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.93291820206517
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=36
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.93782775966748
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.93447133006573
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=38
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.93855764537724
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=39
Action: 1
State: (


Episode = 228
t=92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.93839602850804
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=93
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.9338200130631
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.94467175399872
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=95
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.94467175399872
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.94572708224472
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=97
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.93828515702857
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=98
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 9

Episode = 228
t=149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.94723815774633
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.94723815774633
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=151
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.94829091958857
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.94934262866899
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=153
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.94296883590575
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=154
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.95070787219096
Explore rate: 0.040005161671583855
Learning rate: 0.1
Streaks: 54


Episode = 228
t=155
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Be


Episode = 229
t=8
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.95509005610944
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=9
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.95015853616435
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.96008952931543
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.96008952931543
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.9611294397861
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.95910353128932
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.956587


Episode = 229
t=68
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.96206931487131
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=69
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.9586389224851
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.9696052709005
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.9696052709005
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=72
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.9706356656296
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=73
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.9667785620587
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.96370177


Episode = 229
t=127
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 98.96968455006024
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.97638987600301
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=129
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.97638987600301
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.977413486127
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=131
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.97692309670543
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=132
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.97930785195062
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=133
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 9

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.99071569019402
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=184
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.99071569019402
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=185
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 98.99172497450382
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=186
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 98.95181227852133
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=187
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.9836553993547
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=188
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.9836553993547
Explore rate: 0.03810452633214956
Learning rate: 0.1
Streaks: 55


Episode = 229
t=189
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.98467174395535
Explore rate: 


Episode = 230
t=41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.99694474326506
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 98.99794779852179
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=43
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.96377191320853
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.00150321142209
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.00150321142209
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.00250170821067
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=47
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 98.994


Episode = 230
t=100
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.00617999028233
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=101
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.97757980272156
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.01050305152982
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.01050305152982
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=104
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.01149254847829
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=105
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.00635662101669
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=106
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:


Episode = 230
t=159
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.01376731815631
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=160
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.00860616815449
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=161
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.01424259698797
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.01424259698797
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=163
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 98.98996892097799
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.01692126016562
Explore rate: 0.03621217265444476
Learning rate: 0.1
Streaks: 56


Episode = 230
t=165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:


Episode = 231
t=18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.02426707216026
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=19
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.02038567198986
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.02665917748325
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=21
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.02665917748325
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=22
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.02763251830577
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=23
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.02257359344206
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=24
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.028


Episode = 231
t=77
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.03298736683308
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.03298736683308
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=79
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.00183196684551
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=80
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.00183196684551
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.03819735288442
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.03819735288442
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.039

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.04674492826975
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=135
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.04227563697273
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.04315991611988
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.04315991611988
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.04411675620375
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=139
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.01160124562493
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.04725572350307
Explore rate


Episode = 231
t=191
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.0513959578252
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=192
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.05563739356853
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=193
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.05276446400597
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.05476079090641
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=195
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.05476079090641
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=196
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.0557060301155
Explore rate: 0.03432802877989327
Learning rate: 0.1
Streaks: 57


Episode = 231
t=197
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 9


Episode = 232
t=48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.06237143972567
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=49
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.05579930397815
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=50
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.06350229255119
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=51
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.0257843789619
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.0634557244025
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.0634557244025
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.


Episode = 232
t=108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.07064769383115
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.07064769383115
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.07157704613732
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.07250546909118
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=112
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.07130078976878
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=113
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.06741743241972
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
B

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.0762948342361
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=168
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.0762948342361
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=169
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.07348147862446
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.0792323124143
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.0792323124143
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.0801530801019
Explore rate: 0.032452023781137984
Learning rate: 0.1
Streaks: 58


Episode = 232
t=173
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.0763281475244
Explore rate


Episode = 233
t=27
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.06037203041718
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.08468013993642
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=29
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.08468013993642
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.08559545979648
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=31
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.08236656808269
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.08664900749488
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.086


Episode = 233
t=88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.09847331108688
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=89
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.09326280999035
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=90
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.09967037218726
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=91
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.05863283326157
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.09870402742791
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=93
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.09870402742791
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.099


Episode = 233
t=148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.10519338129534
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=149
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.10608818791404
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=150
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.10320006811905
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=151
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.1075006964973
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.1075006964973
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=153
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.10452263026038
Explore rate: 0.03058408764601861
Learning rate: 0.1
Streaks: 59


Episode = 233
t=154
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 9


Episode = 234
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.11331929609166
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=7
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.11331929609166
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.11420597679557
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=9
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.10906973352391
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=10
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.11538049735577
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=11
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.08398428613555
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.1122888


Episode = 234
t=64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.11914929833746
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.11914929833746
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.12003014903912
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.12091011889008
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=68
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.11815177307325
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=69
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.12230731342832
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=70
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.119

Best Q: 99.13370981487408
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=124
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.13370981487408
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=125
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.13457610505921
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=126
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.12623158789988
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=127
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.13539430326082
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=128
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.10036277091548
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=129
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.13317875575662
Explore rate: 0.02872415126189476
Learning r


Episode = 234
t=181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.13808370750485
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.13808370750485
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.13894562379734
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=184
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.11648776832943
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=185
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.14547958957074
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.14547958957074
Explore rate: 0.02872415126189476
Learning rate: 0.1
Streaks: 60


Episode = 234
t=187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q:


Episode = 235
t=41
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.14876420998853
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.14961544577854
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.14678688290576
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=44
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.15095114870715
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=45
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.10967633996044
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.14661512582026
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 


Episode = 235
t=101
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.1522836843781
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=102
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.15591660033279
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=103
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.15591660033279
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.15676068373246
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=105
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.15175000973449
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.1554219479611
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Bes


Episode = 235
t=159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.16476196825255
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=160
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.1655972062843
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=161
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.16060352526135
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=162
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.16668819717108
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=163
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.16204530425516
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.16220213556933
Explore rate: 0.026872146400301333
Learning rate: 0.1
Streaks: 61


Episode = 235
t=165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Be

Streaks: 62


Episode = 236
t=17
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.1371184328966
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.17368529378034
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.17368529378034
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.17451160848655
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.17114958879691
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.16879968833689
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=23
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Be



Episode = 236
t=74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.1799147791192
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=75
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.14620691793498
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.1838306367847
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=77
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.1838306367847
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.18464680614792
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=79
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.1778187540187
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=80
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.185520

Learning rate: 0.1
Streaks: 62


Episode = 236
t=131
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.18994035695131
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.19075041659435
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=133
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.18801547041737
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=134
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.1920172311313
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=135
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.18922362925763
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.18320821059258
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=137
Action: 1
State: (

State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.18548709167813
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=189
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.19089645620753
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.19089645620753
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=191
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.18683713167486
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.19438705947566
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=193
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.19438705947566
Explore rate: 0.02502800570193109
Learning rate: 0.1
Streaks: 62


Episode = 236
t=194
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.19519267241618
Explore rate

Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.20148738371614
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=49
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.19746238374246
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=50
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.17706316084892
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=51
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.17706316084892
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.2045139420577
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=53
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.2045139420577
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Epis

Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.20868624349511
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.20947755725162
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=109
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.20837031856618
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=110
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.20837031856618
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.20816843014975
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.20816843014975
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 6


Episode = 237
t=165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.2158050926018
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.2158050926018
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.2165892875092
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=168
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.21737269822168
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=169
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.21127775271812
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=170
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.2182562904902
Explore rate: 0.023191662661933732
Learning rate: 0.1
Streaks: 63


Episode = 237
t=171
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best 


Episode = 238
t=23
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.2236694510627
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.22444578161165
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.21831245969628
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.2238226033114
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.2238226033114
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.22459878070809
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.

Learning rate: 0.1
Streaks: 64


Episode = 238
t=81
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.2276914014895
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.23513212145141
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=83
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.23513212145141
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.23589698932996
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=85
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.23004523787719
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.23321065092489
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=87
Action: 0
State: (0


Episode = 238
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.2419391086044
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.2367950490827
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=140
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.24287016240541
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.24287016240541
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=142
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.23815969025257
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.23450625390002
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64


Episode = 238
t=144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Bes


Episode = 238
t=199
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.24549592578893
Explore rate: 0.021363051615525652
Learning rate: 0.1
Streaks: 64

Episode 238 finished after 199.000000 time steps

Episode = 239
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.24663600362607
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.24265659300575
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.24767343356777
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.24391060362838
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.24549592578893
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=5
Action: 0
S


Episode = 239
t=61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.25083528536037
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.25210920655948
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.25210920655948
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.25285709735292
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=65
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.25184726769206
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.25499900277396
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=67
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.254

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.25809372032718
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.26282922266753
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.26282922266753
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.26356639344486
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=123
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.26025419458352
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=124
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.26463770961004
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=125
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.26598970215515
Explore rate

Streaks: 65


Episode = 239
t=177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.268340871088
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.26907253021692
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=179
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.26870872807451
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.27133310745091
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.27133310745091
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.27206177434346
Explore rate: 0.01954210772389986
Learning rate: 0.1
Streaks: 65


Episode = 239
t=183
Action: 0
State: (0, 0, 3, 2)
Reward: 

State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.27522017397625
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=34
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.28221641128692
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=35
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.28221641128692
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.28221641128692
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.27663758129603
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.26813554085211
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.26813554085211
Explore rate


Episode = 240
t=92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.27513742567845
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.27586228825277
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=94
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.27368923323476
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=95
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.2770230174583
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=96
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.26055749575383
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=97
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.28309354044019
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 9

Streaks: 66


Episode = 240
t=149
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.29454178222231
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=150
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.29454178222231
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=151
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.28946786876504
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=152
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.28270600857864
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=153
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.28270600857864
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.28342330257007
Explore rate: 0.017728766960431575
Learning rate: 0.1
Streaks: 66


Episode = 240
t=155
Action: 0
State: (0, 0, 2, 1)



Episode = 241
t=7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.29216105334741
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.29463147083328
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.29463147083328
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.29533683936245
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.29254575341518
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=12
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.25610380517708
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=13
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.30114533147427
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.30114533147427
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.3018441861428
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=67
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.30041738275415
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=68
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.30302942865366
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=69
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.30302942865366
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=70
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.30137555791545
Explore rate:


Episode = 241
t=123
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.30627799209333
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.30705467409872
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=125
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.30705467409872
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=126
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.30774761942462
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=127
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.2741172260414
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.30888570496518
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=129
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Be

Learning rate: 0.1
Streaks: 67


Episode = 241
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.31619231297873
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.31687612066575
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=185
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.3175592445451
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=186
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.31271726303851
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=187
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.3183765258113
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=188
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.28620694853217
Explore rate: 0.015922966097169144
Learning rate: 0.1
Streaks: 67


Episode = 241
t=189
Action: 1
Sta


Episode = 242
t=43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.31663417652105
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.31731754234453
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=45
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.31676804820964
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=46
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.31856023909471
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=47
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.29740623359248
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=48
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.3142005909188
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=49
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 9

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.32090772924242
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.32090772924242
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.32158682151318
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=103
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.31881587435517
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.32424597330746
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.32424597330746
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.32492172733414
Explor


Episode = 242
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.32773074895604
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.32840301820708
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=161
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.32243570661443
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=162
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.33398596967453
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.33398596967453
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.33465198370486
Explore rate: 0.014124642691606293
Learning rate: 0.1
Streaks: 68


Episode = 242
t=165
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
B

Episode = 243
t=15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.33791761599333
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.33857969837734
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=17
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.33439856954153
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=18
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.33625156654676
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=19
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.33625156654676
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=20
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.33691531498022
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=21
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 9


Episode = 243
t=75
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.32246555574977
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.34341582522418
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.34341582522418
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.34407240939896
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=79
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.34472833698956
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=80
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.34108389593885
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=81
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 

Episode = 243
t=133
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.349905218496
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=134
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.349905218496
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=135
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.32809470501677
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.35018082754698
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.35018082754698
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.35083064671943
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q


Episode = 243
t=191
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.35376430205109
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=192
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.35441053774903
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=193
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.35121165522143
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=194
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.35532046835708
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=195
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.35226721606664
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=196
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.35566287591197
Explore rate: 0.012333735073725371
Learning rate: 0.1
Streaks: 69


Episode = 243
t=197
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
B

Episode = 244
t=49
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.35815642383928
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.36268979198563
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=51
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.36268979198563
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.36332710219365
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=53
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.35838241979889
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.3630531386941
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99


Episode = 244
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.3684022093069
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=107
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.36553510221809
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.36894250894684
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.36894250894684
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.3695735664379
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=111
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.36509510546948
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Bes

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.37047362583769
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.3750565588963
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.3750565588963
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=168
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.37568150233741
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=169
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.3736119835145
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.37559362956553
Explore rate: 0.010550182333308178
Learning rate: 0.1
Streaks: 70


Episode = 244
t=171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.37559362956553
Explore r

Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=23
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.37946329307441
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=24
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.38442520235223
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=25
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.35739190510324
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.37511964160757
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.37511964160757
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.37574452196596
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.37518384123598
Explore rate: 0.01
Learning


Episode = 245
t=91
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.386748302468
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.38290804455
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.38290804455
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.38352513650545
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=95
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.36862025080163
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.39212717691237
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=97
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.39212717691237
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=98
Action: 1
State

Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.39559828477012
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=162
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.39220101380293
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.39017307131037
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.39017307131037
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.39078289823907
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=166
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.38751092108171
Explore rate: 0.01
Learning rate: 0.1
Streaks: 71


Episode = 245
t=167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.3964103182033
Explore rate: 0.01
Le


Episode = 246
t=29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.39369492015386
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.39916815433328
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.39916815433328
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=32
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.39976898617894
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=33
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.39935549086775
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=34
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.4008684890694
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=35
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.37782806084758
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=36
Action: 

t=97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.40496196508697
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.40555700312188
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=99
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.40062961669992
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=100
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.40619863356018
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=101
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.3888486102113
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.40633943856409
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.40633943856409
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=104
Action: 1
State: (

State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.39355809681707
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.41136178809906
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.41136178809906
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.41195042631095
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.41253847588465
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=162
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.40936275081643
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=163
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.41334025599872
Explore rate: 0.01
Learning rate: 0.1
Streaks: 72


Episode = 246
t=164
Action: 1
State: (0, 0, 2, 0)



Episode = 247
t=27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.4163682859396
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.41695191765366
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=29
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.41519969786604
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=30
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.41788623925115
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=31
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.4160504657653
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.41655517453134
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=33
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.41655517453134
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=34
Action: 0


Episode = 247
t=87
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.42249309939774
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.42265520739605
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=89
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.42265520739605
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=90
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.42323255218865
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=91
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.4202068631655
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.4256781442487
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=93
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.4256781442487
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=94
Action: 1



Episode = 247
t=156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.42851087490283
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=157
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.42908236402792
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=158
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.42593000744789
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=159
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.42985502423484
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=160
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.42689265410235
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.43242154882438
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=162
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.43242154882438
Explore rate: 0.01
Learning rate: 0.1
Streaks: 73


Episode = 247
t=163


Episode = 248
t=15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.4162045029704
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.43470993442715
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.43470993442715
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.43527522449273
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=19
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.43295341908686
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=20
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.43611834283101
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=21
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.41897455532455
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=22
Action: 


Episode = 248
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.44361715979318
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=85
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.44069089175647
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.4441642211437
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=87
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.4441642211437
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=88
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.44472005692255
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=89
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.4401346965275
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.44438458665394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=91
Action: 1



Episode = 248
t=144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.44906438105505
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=145
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.44906438105505
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.449615316674
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=147
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.44687806267146
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=148
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.45039005942607
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=149
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.42709192501758
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.44833908112079
Explore rate: 0.01
Learning rate: 0.1
Streaks: 74


Episode = 248
t=151
A


Episode = 249
t=3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.43242398401254
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.45355366682989
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=5
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.45355366682989
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.45410011316307
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=7
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.44918957638835
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.45393959254815
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.45393959254815
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=10
Action: 1
Stat


Episode = 249
t=63
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.45693692324818
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.46301640690702
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=65
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.46301640690702
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.46355339050011
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=67
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.45897691690585
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=68
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.46411956817234
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=69
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.43508339862746
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=70
Action:


Episode = 249
t=132
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.46624434796216
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=133
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.46523758921586
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=134
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.46815489346933
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=135
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.46815489346933
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=136
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.46868673857587
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=137
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.46441430496152
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.46715881458515
Explore rate: 0.01
Learning rate: 0.1
Streaks: 75


Episode = 249
t=139

Episode = 250
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.47537165646199
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=1
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.44768692840681
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=2
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.47034065362712
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=3
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.47034065362712
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.47087031297349
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.46956117794585
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.4717460550111
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=7
Action: 0
State: 


Episode = 250
t=70
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48095059873427
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=71
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.45290023761407
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=72
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.47424231355704
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.47424231355704
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.47476807124349
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=75
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.47574675537976
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=76
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.47586290163763
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=77
Action:

Learning rate: 0.1
Streaks: 76


Episode = 250
t=139
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.484571202046
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48508663084395
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=141
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.45846541453851
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.48266553868795
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.48266553868795
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.48318287314926
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=145
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.48239505624629
Explore rate: 0.01
Learning rate: 0.1
Str


Episode = 250
t=198
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.45948297047818
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76


Episode = 250
t=199
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.45948297047818
Explore rate: 0.01
Learning rate: 0.1
Streaks: 76

Episode 250 finished after 199.000000 time steps

Episode = 251
t=0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.48967369977402
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.48839952020133
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=2
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48481802002361
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=3
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48481802002361
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48533320200359
Explore rate: 0.01
Learning rate


Episode = 251
t=58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.49214773592566
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.490999554174
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=60
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.49299898523398
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.49170649829476
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48870325099723
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48870325099723
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=64
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.48921454774623
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=65
Action: 1


Episode = 251
t=127
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.47217742119686
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=128
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.49616236625558
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=129
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.49616236625558
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=130
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.49666620388932
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=131
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.49412785290869
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=132
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.49607493676216
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=133
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.49607493676216
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=134


Episode = 251
t=186
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.50094167921459
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=187
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.50081032624067
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=188
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.50376112580825
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=189
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.50376112580825
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=190
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.50425736468245
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=191
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.49936540964123
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.50187688607966
Explore rate: 0.01
Learning rate: 0.1
Streaks: 77


Episode = 251
t=193


Episode = 252
t=54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.50496934725194
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=55
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.50546437790469
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=56
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.50180488097065
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=57
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.5060417053902
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=58
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.5060417053902
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=59
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.50272252170721
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.50845601780652
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=61
Action: 0


Episode = 252
t=122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.51088657849841
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.51137569191991
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=124
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.51186431622799
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=125
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.50919151185066
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=126
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.5125271663938
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=127
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.51001255013858
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=128
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.51248937927755
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=129



Episode = 252
t=181
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.51405970165307
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=182
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.51514281735093
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.51514281735093
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=184
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.5146528704055
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=185
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.51578803658848
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.51578803658848
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=187
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.51627224855189
Explore rate: 0.01
Learning rate: 0.1
Streaks: 78


Episode = 252
t=188



Episode = 253
t=50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.52077640391148
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=51
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.51743392708828
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=52
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.52135602353856
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=53
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.51830478070977
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=54
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.52227564006918
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.52227564006918
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.52275336442911
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=57
Action:


Episode = 253
t=111
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.52210624710426
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=112
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.52546596681009
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=113
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.51040172761752
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.52734242514937
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=115
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.52734242514937
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.52781508272422
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.52828726764149
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=118

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.53090193802372
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.53090193802372
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=172
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.53137103608569
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=173
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.51256845494555
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.53348858931473
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.53348858931473
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.53395510072542
Explore rate: 0.01
Learning rate: 0.1
Streaks: 79


Episode = 253
t=177
Action: 0
State: (0, 0, 2, 1)



Episode = 254
t=30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.5365172810966
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.5365172810966
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.53698076381549
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=33
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.534756363619
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.53683441301273
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=35
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.53683441301273
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.53729757859972
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=37
Action: 1
S


Episode = 254
t=99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.54158737599707
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=100
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.54204578862107
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=101
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.5409018426834
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.54459643858947
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.54459643858947
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=104
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.54505184215088
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=105
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.54044342380222
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=106
A

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.54899033258525
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=167
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.54464895097256
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=168
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.54941745417368
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=169
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.52101584943028
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.54607575044295
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.54607575044295
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.54652967469251
Explore rate: 0.01
Learning rate: 0.1
Streaks: 80


Episode = 254
t=173
Action: 0
State: (0, 0, 2, 2)



Episode = 255
t=25
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.54829164154019
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.55180977545987
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=27
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.55180977545987
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=28
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.55225796568442
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=29
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.55076659369614
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=30
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.55132627799169
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=31
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.55132627799169
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=32
Action:

Learning rate: 0.1
Streaks: 81


Episode = 255
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.55884916354452
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=95
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.55483104698209
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=96
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.55928955659411
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=97
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.53668047368124
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.5553875441152
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.5553875441152
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=100
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.55583215657109
Explore rate: 0.01
Learning rate: 0.1
Streaks: 

Learning rate: 0.1
Streaks: 81


Episode = 255
t=163
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.56093177484152
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.56137084306668
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=165
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.55987308571841
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=166
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.56139855362785
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=167
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.56139855362785
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=168
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.56183715507423
Explore rate: 0.01
Learning rate: 0.1
Streaks: 81


Episode = 255
t=169
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.56062172701257
Explore rate: 0.01
Learning rate: 0.1
S

Reward: 1.0
Best Q: 99.54625717763359
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.56753310148916
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=35
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.56753310148916
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.56796556838768
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.56569782654032
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=38
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.56856192736485
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=39
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.53931717371756
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.5668

Streaks: 82


Episode = 256
t=103
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.54876961566744
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=104
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.57342220953687
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.57342220953687
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.57384878732734
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=107
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.57086072140298
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=108
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.57436265610491
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=109
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.57163655221707
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episod


Episode = 256
t=170
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.57189904911948
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=171
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.57232715007036
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=172
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.57275482292029
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=173
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.57191365993576
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.57509953566267
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.57509953566267
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.57552443612701
Explore rate: 0.01
Learning rate: 0.1
Streaks: 82


Episode = 256
t=177


Episode = 257
t=40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.5771390213653
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=41
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.5765752398347
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58206242610619
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58206242610619
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58248036368009
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=45
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.57946893362794
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=46
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58297551941394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=47
Action: 0

Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.58079367000579
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.58121287633578
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.58163166345945
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=113
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.58065471541136
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=114
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.58232984544212
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=115
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.5718232801685
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58380741002692
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=117
Action: 0
State: (0, 


Episode = 257
t=169
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58751618584009
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=170
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58792866965425
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58834074098459
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=172
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.58650374464435
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=173
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.58894103093904
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=174
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.58715853224288
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=175
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.58666877174906
Explore rate: 0.01
Learning rate: 0.1
Streaks: 83


Episode = 257
t=176

Reward: 1.0
Best Q: 99.59089353973111
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=37
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.59088096770196
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59000437684747
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=39
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59000437684747
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59041437247063
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=41
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.58893820203514
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.59166959737473
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=43
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.5916

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59589455398003
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=105
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59589455398003
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59629865942605
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=107
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.59362975506168
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.59605895368387
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.59605895368387
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.59646289473018
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=111
Action: 0
State: (0, 0, 2, 2)



Episode = 258
t=164
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.5845371159874
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59925532131354
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59925532131354
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=167
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.59965606599222
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=168
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.60005640992622
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=169
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.59659075492198
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=170
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.60047320290194
Explore rate: 0.01
Learning rate: 0.1
Streaks: 84


Episode = 258
t=171


Learning rate: 0.1
Streaks: 85


Episode = 259
t=33
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.60480594797376
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=34
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.60616679543794
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=35
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.60533586592474
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6020052374548
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=37
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6020052374548
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=38
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.60007571720291
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.60647836662069
Explore rate: 0.01
Learning rate: 0.1
Streaks: 8


Episode = 259
t=92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6078407432697
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6078407432697
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.60823290252644
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=95
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.6073391500727
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=96
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.60888877851872
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.60888877851872
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=98
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.60788522413878
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=99
Action: 1


State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.61101218714323
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=153
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.60157577479741
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6119659380124
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6119659380124
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.61235397207439
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=157
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.61274161810232
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=158
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.61011128656403
Explore rate: 0.01
Learning rate: 0.1
Streaks: 85


Episode = 259
t=159
Action: 1
State: (0, 0, 2, 1)
Re


Episode = 260
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.61547377626603
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.61585830248976
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.61385157150893
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.61462471683689
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.61462471683689
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.61501009212004
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.61273331487146
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=18
Action:


Episode = 260
t=82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62168860601561
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=83
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.618412388557
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=84
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.62186802093224
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=85
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.62186802093224
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=86
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6222461529113
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=87
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.61920413177324
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=88
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62208305213578
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=89
Action: 1



Episode = 260
t=142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62511369197259
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=143
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62511369197259
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62548857828061
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=145
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.62307176614603
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.62611592918662
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=147
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.62611592918662
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=148
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.62648981325744
Explore rate: 0.01
Learning rate: 0.1
Streaks: 86


Episode = 260
t=149


Episode = 261
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.62629601446434
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62750145055301
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.62678905662266
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62780342210336
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.62726268974862
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=6
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.62812208617814
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=7
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.61417405812011
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=8
Action: 0
State

Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=70
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.63128635546191
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.63165506910644
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=72
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.63036120931416
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=73
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63102154473448
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63102154473448
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63139052318975
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=76
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.63033906160587
Explore rate: 0.01
Learning


Episode = 261
t=139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63505530783128
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63505530783128
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63542025252345
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63578483227093
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=143
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.63434117714318
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=144
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63633391923197
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=145
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.63490411743282
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=146

Episode = 261
t=198
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.63677279538047
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87


Episode = 261
t=199
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63917575013097
Explore rate: 0.01
Learning rate: 0.1
Streaks: 87

Episode 261 finished after 199.000000 time steps

Episode = 262
t=0
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63917575013097
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=1
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.63737391510539
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=2
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63968293453819
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=3
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.63796513411413
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=4
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.63987318936167
Explore rate: 0.01
Learning rate:

Episode = 262
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6406201056112
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6409794855056
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=68
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.63909071046002
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=69
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.64147463575362
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=70
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.63968762835363
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=71
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.64165624738527
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=72
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.63245637155426
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=73
Action: 0


State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.64499877545028
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=127
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.64535377667482
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=128
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.64384526610196
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=129
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.64615756404123
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=130
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.64615756404123
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=131
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.64651140647719
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=132
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.63242685610494
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=133
Action: 1
State: (0, 0, 3, 1)



Episode = 262
t=186
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65043144685896
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=187
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.64761167647117
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=188
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65081646984153
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=189
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.64828133933837
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=190
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.64806861544318
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.64806861544318
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.64842054682774
Explore rate: 0.01
Learning rate: 0.1
Streaks: 88


Episode = 262
t=193


Episode = 263
t=56
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6524073527269
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=57
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6524073527269
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.65275494537417
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=59
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.6515404713087
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=60
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65436040702787
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65436040702787
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65470604662084
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=63
Action: 1



Episode = 263
t=115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65772698430942
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=116
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65806925732511
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=117
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.65526323340582
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=118
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.65844112936819
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=119
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.64080383119729
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=120
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6572296788834
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=121
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6572296788834
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=122
A


Episode = 263
t=174
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.66023232406842
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=175
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.65940963125368
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=176
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.66034852849512
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.66034852849512
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.66068817996663
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=179
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.65848306582886
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.66079643606402
Explore rate: 0.01
Learning rate: 0.1
Streaks: 89


Episode = 263
t=181


Episode = 264
t=45
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.66560130143652
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=46
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6659357001351
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=47
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.64684960541165
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6652262161361
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6652262161361
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.66556098991997
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=51
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.66324013567939
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=52
Action: 1


State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.66957066006105
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=113
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.66668038274351
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=114
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.66803066332822
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=115
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.66803066332822
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6683626326649
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=117
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.66793027039488
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6699123383525
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=119
Action: 1
State: (0, 0, 3, 1)
Re


Episode = 264
t=181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.67038159360646
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.67038159360646
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.67071121201286
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=184
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.67104050080084
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=185
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.67002263378033
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=186
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.67156475501429
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=187
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.65554951417357
Explore rate: 0.01
Learning rate: 0.1
Streaks: 90


Episode = 264
t=188


Episode = 265
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.67748501685405
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=51
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.67748501685405
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=52
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6778075318372
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=53
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.67547206560948
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=54
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.67818848637016
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=55
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.67818848637016
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=56
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.67606551919918
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=57
Action: 


Episode = 265
t=118
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6776489570817
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=119
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.6770437312333
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=120
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.67820150670426
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=121
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.66594050324781
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6799863270486
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=123
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6799863270486
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=124
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68030634072154
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=125
Act

Episode = 265
t=187
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68096112940836
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=188
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68128016827895
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=189
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.68054714099223
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=190
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68181316625784
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68181316625784
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=192
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.66901170606359
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.68185296572597
Explore rate: 0.01
Learning rate: 0.1
Streaks: 91


Episode = 265
t=194



Episode = 266
t=56
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6866515984097
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=57
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.6851255674437
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=58
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6839441293227
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=59
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6839441293227
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=60
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68426018519338
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=61
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.6830969205532
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.68681386974565
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=63
Action: 1
St

Episode = 266
t=116
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.68648974203202
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=117
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68753700441283
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.68753700441283
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=119
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.67685580591923
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=120
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.68898885307453
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.68898885307453
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.68929986422145
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=123


t=176
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.68111559224637
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69130218978464
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69130218978464
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69161088759485
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=180
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.68963097967999
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.69014725486029
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.69014725486029
Explore rate: 0.01
Learning rate: 0.1
Streaks: 92


Episode = 266
t=183
Action: 0
Stat


Episode = 267
t=35
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.69336053725407
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=36
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.6947136387901
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=37
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.69380113376889
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69199296642479
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=39
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69199296642479
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=40
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69230097345837
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=41
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.69103020821466
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=42
Action: 


Episode = 267
t=105
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69531584397198
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=106
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69531584397198
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=107
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69562052812802
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=108
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.69412381045936
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=109
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69605067407538
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=110
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.69462044614689
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.69746525085847
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=112



Episode = 267
t=173
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.69904849247483
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=174
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.69834145040078
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=175
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.6995503031738
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=176
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.6987627853749
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70043445572394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70043445572394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=179
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70073402126822
Explore rate: 0.01
Learning rate: 0.1
Streaks: 93


Episode = 267
t=180


State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.70314850236095
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=41
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.70256000209677
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=42
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7036542586803
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=43
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.70296577349644
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=44
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70303749204076
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=45
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70303749204076
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=46
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70333445454872
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=47
Action: 0
State: (0, 0, 2, 2)
Reward: 


Episode = 268
t=109
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70667033990263
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70696366956273
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=111
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.70465371783337
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=112
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.70809081966675
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=113
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.70809081966675
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.70838272884708
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=115
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.7068989939409
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=116


Learning rate: 0.1
Streaks: 94


Episode = 268
t=169
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.71256146529647
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=170
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.69660740894899
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=171
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70984499288025
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=172
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.70984499288025
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=173
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71013514788737
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71042501273948
Explore rate: 0.01
Learning rate: 0.1
Streaks: 94


Episode = 268
t=175
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.70902618275161
Explore rate: 0.01
Learning rate: 0.1
S


Episode = 269
t=38
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.71426312471995
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=39
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.71279455804111
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71481636678428
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=41
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71481636678428
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=42
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71510155041749
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=43
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.70160482570824
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.71466063668178
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=45
Action:


Episode = 269
t=107
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.71433337447569
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.71928699474188
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.71928699474188
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.71956770774713
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=111
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.71722196770523
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71601521085655
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=113
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71601521085655
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=114


Episode = 269
t=178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.71872970824985
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72183889260167
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72183889260167
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72211705370907
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=182
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.70605139503678
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71971748485079
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=184
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.71971748485079
Explore rate: 0.01
Learning rate: 0.1
Streaks: 95


Episode = 269
t=185


Episode = 270
t=48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.72297299750838
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.72297299750838
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=50
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.72325002451088
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=51
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.7218668147598
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=52
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.72363891169894
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=53
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.72232038554202
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72300238044028
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=55
Action: 


Episode = 270
t=107
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.72561898973134
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=108
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72513030269162
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72513030269162
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72540517238893
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=111
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.72453228976941
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.727049279378
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=113
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.727049279378
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=114
Act

Best Q: 99.73045743815614
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73072698071798
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=179
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73099625373726
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=180
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.7287775195327
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=181
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72827064484734
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=182
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72827064484734
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=183
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.72854237420249
Explore rate: 0.01
Learning rate: 0.1
Streaks: 96


Episode = 270
t=184
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.7276636866


Episode = 271
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.73117772136663
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73115661507121
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=39
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73115661507121
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=40
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73142545845614
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=41
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.73027494849612
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=42
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73236033447662
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=43
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73236033447662
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=44
Action:


Episode = 271
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7340310870377
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=97
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7340310870377
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73429705595066
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=99
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.73371131839845
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=100
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73474390352669
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73474390352669
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=102
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.73407983300774
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=103
Actio


Episode = 271
t=157
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73654842007561
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73681187165553
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=159
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.73555884477751
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=160
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73718787943847
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73718787943847
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=162
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.73598456036417
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73487479352784
Explore rate: 0.01
Learning rate: 0.1
Streaks: 97


Episode = 271
t=164


Episode = 272
t=26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73844073116773
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73844073116773
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.73870229043656
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=29
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.732001505675
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=30
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73954392322484
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=31
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73954392322484
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=32
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.73980437930162
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=33
Action: 0

Learning rate: 0.1
Streaks: 98


Episode = 272
t=97
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.74237926677307
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.74237926677307
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.7426368875063
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=100
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.7409066481712
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.74221358260839
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.74221358260839
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.74247136902578
Explore rate: 0.01
Learning rate: 0.1
Streak

State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7448019407955
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=157
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.74372636570865
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=158
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.74393949801079
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=159
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.74393949801079
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=160
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.74419555851279
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=161
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.74269244257309
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=162
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.74453277847357
Explore rate: 0.01
Learning rate: 0.1
Streaks: 98


Episode = 272
t=163
Action: 0
State: (0, 0, 2, 1)
R

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.74458427392008
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.74762551656961
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.74762551656961
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.74787789105304
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=29
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.74653733977141
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=30
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.74822420848315
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=31
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.73930646924693
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=32
Action: 0
State: (0, 0, 2, 1)
Reward:

Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75120226009015
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75120226009015
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=97
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.74971452200684
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.7491733317549
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=99
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.7491733317549
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=100
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.74787514587193
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=101
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75152768972573
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=102
Action: 1
State: (0, 0, 3,


Episode = 273
t=154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.7517672732696
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.7517672732696
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.75201550599634
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=157
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.74681630230263
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75322216800227
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75322216800227
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=160
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75346894583427
Explore rate: 0.01
Learning rate: 0.1
Streaks: 99


Episode = 273
t=161
A


Episode = 274
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.75526757385262
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=25
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.7537005317509
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7557787597363
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=27
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7557787597363
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=28
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75602298097657
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=29
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.75390961100089
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=30
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75627731368512
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=31
Act

Best Q: 99.74899100973683
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.75796536496696
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=93
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.75796536496696
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.758207399602
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=95
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.75642427620842
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75777495312876
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=97
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75777495312876
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.75801717817


Episode = 274
t=160
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.76106745426374
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=161
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.76106745426374
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=162
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.75411190582729
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=163
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76004016662006
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=164
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76004016662006
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=165
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76028012645344
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 274
t=166
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76051984632699
Explore rate: 0.01
Learning rate: 0.1
Streaks: 100


Episode = 27

Best Q: 99.7613613110343
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76159994972328
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=21
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.76004731222184
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=22
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76189919870616
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=23
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.75619155319484
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=24
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7628618367429
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=25
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7628618367429
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=26
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.763098974906

State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.76315360922312
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=88
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76479950731668
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=89
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.75887850383056
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=90
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.76582037908327
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=91
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.76582037908327
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=92
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.76605455870418
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=93
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.76467581327232
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=94
Action: 0
State: (0, 0, 3, 1)



Episode = 275
t=146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76612155849125
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=147
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76635543693276
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=148
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.76466063735349
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=149
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76663157644424
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=150
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76663157644424
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=151
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.76509109968612
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275
t=152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7686128894717
Explore rate: 0.01
Learning rate: 0.1
Streaks: 101


Episode = 275


Episode = 276
t=5
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.7686695234735
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=6
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.76943213231789
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.76355440588003
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.77024131423394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.77024131423394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7704710729197
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=11
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.76937049583731
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=12
Action:


Episode = 276
t=65
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.77247833348983
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.77318620559751
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.77318620559751
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.77341301939191
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=69
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.76198188795524
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77346588970704
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=71
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77346588970704
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=72



Episode = 276
t=135
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.76702616662051
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=136
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77583666004699
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77583666004699
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77606082338694
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77628476256355
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=140
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.77502475625587
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 276
t=141
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77658508089021
Explore rate: 0.01
Learning rate: 0.1
Streaks: 102


Episode = 27

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77809802804788
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=5
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77809802804788
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.77809802804788
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=7
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.76936689375067
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=8
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.77584682774514
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=9
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.77584682774514
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=10
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7760709809174
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=11
Action: 1
State: (0, 0, 3, 0)
Reward

State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.77795480554961
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78041456366893
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78041456366893
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78063414910525
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.77956258657021
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=78
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78094485953099
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=79
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.77991986900676
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=80
Action: 1
State: (0, 0, 3, 1)


Best Q: 99.78386557136179
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=133
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.78224971254109
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7814924588047
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7814924588047
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7817109663459
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=137
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.77353802158667
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78392173576718
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78392

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78452599642236
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=192
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.78398732990095
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=193
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78361749810814
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=194
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78361749810814
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=195
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78383388061003
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=196
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.78292968625394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78488206904355
Explore rate: 0.01
Learning rate: 0.1
Streaks: 103


Episode = 277
t=198
Action: 0
State: (0, 0,


Episode = 278
t=50
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.77935136452905
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=51
Action: 1
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.77935136452905
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78616565353477
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=53
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78616565353477
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=54
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78637948788123
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=55
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.78499614182499
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=56
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78664841559468
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=57


Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=109
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.78779131944943
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=110
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78925538441663
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=111
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78925538441663
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=112
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.78946612903222
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=113
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.78822453346616
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=114
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78971876002635
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=115
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.78971876002635
Explore rate:


Episode = 278
t=177
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.79086224344773
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79167539578135
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79167539578135
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79188372038557
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=181
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.7907020783197
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79183399307914
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79183399307914
Explore rate: 0.01
Learning rate: 0.1
Streaks: 104


Episode = 278


Episode = 279
t=45
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79215189110207
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=46
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79510249035597
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=47
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79510249035597
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=48
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79530738786562
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=49
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79551208047775
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=50
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.7941824480847
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=51
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7926518485371
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=52
Ac


Episode = 279
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79563655421519
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79563655421519
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79584091766098
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=108
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.79528650833493
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=109
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7950373026525
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=110
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7950373026525
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79524226534986
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279



Episode = 279
t=173
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.79711663210648
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=174
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79718173438074
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=175
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.78675388955469
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=176
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.78675388955469
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=177
Action: 0
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.78675388955469
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=178
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79672883894293
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 279
t=179
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79672883894293
Explore rate: 0.01
Learning rate: 0.1
Streaks: 105


Episode = 27

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.79955599761114
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=33
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.79132791136131
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79827416136887
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=35
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79827416136887
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=36
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.7984758872075
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=37
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.79870152833863
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=38
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.79888112149379
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=39
Action: 1
State: (0, 0, 3, 0)
R


Episode = 280
t=102
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80100014707489
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=103
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80119914692781
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=104
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.79364062852882
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80134992471902
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80134992471902
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8015485747943
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=108
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8017470262195
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280



Episode = 280
t=161
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.80139593507117
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=162
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.802723344653
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=163
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8017259526847
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=164
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80319299695255
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=165
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80319299695255
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=166
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8033898039556
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=167
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.80158096751423
Explore rate: 0.01
Learning rate: 0.1
Streaks: 106


Episode = 280
t=


Episode = 281
t=21
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80478198470641
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=22
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8049772027217
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=23
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.80431169791684
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=24
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80446320521305
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=25
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80446320521305
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=26
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80465874200785
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=27
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.80317256999143
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=28
A


Episode = 281
t=81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8062006733816
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80639447270822
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=83
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.80503896994827
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=84
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80731028929563
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80731028929563
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80750297900633
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=87
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.80663430870315
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=88
A


Episode = 281
t=140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80724554502565
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=141
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.80636868667682
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=142
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80752496951357
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=143
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.80285293187585
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80876989752912
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=145
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80876989752912
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 281
t=146
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.80896112763159
Explore rate: 0.01
Learning rate: 0.1
Streaks: 107


Episode = 28


Episode = 282
t=9
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8092690902338
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.80945982114356
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=11
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.80877136436618
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81090708643114
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=13
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81090708643114
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81109617934472
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=15
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.80314939985061
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=16
Ac

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.81247843803285
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8116815719149
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=78
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.81275583925492
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=79
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.80737047267367
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=80
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81312462982058
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=81
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81312462982058
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=82
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81331150519075
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=83
Action: 1
State: (0, 0, 3, 0)
R


Episode = 282
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81479326688638
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81497847361949
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81516349514587
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.81304825645334
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=140
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81530527587454
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=141
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.80531158493321
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.81412129615686
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 28


Episode = 282
t=196
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.81605706944136
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=197
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.81605706944136
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=198
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.81624101237192
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108


Episode = 282
t=199
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.81540619792709
Explore rate: 0.01
Learning rate: 0.1
Streaks: 108

Episode 282 finished after 199.000000 time steps

Episode = 283
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81602732631697
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.8151310528793
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81628814332663
Explore rate: 0.01
Lear


Episode = 283
t=65
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.81675107008554
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=66
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81749571538352
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=67
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81749571538352
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=68
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81767821966812
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=69
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.81762729135158
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=70
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.81801958914741
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=71
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.81070523604251
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=72



Episode = 283
t=134
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82000255233596
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=135
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82018254978362
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=136
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.81976934253076
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=137
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8195647522816
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=138
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8195647522816
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=139
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8197451875293
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t=140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.81992544234177
Explore rate: 0.01
Learning rate: 0.1
Streaks: 109


Episode = 283
t


Episode = 284
t=4
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82124380736563
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=5
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82124380736563
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=6
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82142256355827
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=7
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.82112255434487
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=8
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8217321597754
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=9
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.81608452465801
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82180576869641
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=11
Action:


Episode = 284
t=71
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82340177542288
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=72
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82357837364745
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=73
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.8228241447327
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=74
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82407482549668
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=75
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82407482549668
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82425075067118
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=77
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.82290283114685
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=78
A

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82686214258887
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=141
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.82570547815087
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=142
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82512863263557
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=143
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82512863263557
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82530350400293
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=145
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.82077505364268
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=146
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82692077066692
Explore rate: 0.01
Learning rate: 0.1
Streaks: 110


Episode = 284
t=147
Action: 0
State: (0, 0,


Episode = 285
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.82625393242878
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82711584563187
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=3
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.82651300790346
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=4
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82722904885112
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=5
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.82675738294937
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=6
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82773940443577
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=7
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82773940443577
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=8
Action: 

Best Q: 99.82872477743321
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82872477743321
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82889605265578
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=63
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.82906715660312
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=64
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.82847316229925
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82892367271678
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=66
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.82892367271678
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=67
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.829094749



Episode = 285
t=119
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.8296485532455
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=120
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83102717186928
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=121
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.82995538793601
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=122
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83041473404353
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=123
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83041473404353
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=124
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83058431930948
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=125
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.82617385554431
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 28


Episode = 285
t=188
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.82743813959367
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=189
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83233114647356
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=190
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83233114647356
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=191
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83249881532709
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=192
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83266631651176
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=193
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.83140850354535
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 285
t=194
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83285972702672
Explore rate: 0.01
Learning rate: 0.1
Streaks: 111


Episode = 28


Episode = 286
t=47
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.83358342807637
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=48
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83461064882684
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=49
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.83385153950259
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=50
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83406649908059
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=51
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83406649908059
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=52
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83423243258152
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=53
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.83295598167383
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=54



Episode = 286
t=116
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83628397997308
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=117
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83628397997308
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8364476959931
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=119
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.83589112479704
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=120
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8367033448223
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=121
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8367033448223
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=122
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.83613564345475
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t


Episode = 286
t=175
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83833996036097
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=176
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.83150921278438
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83756929449092
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83756929449092
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83773172519643
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.83789399347124
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 286
t=181
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.83690885278008
Explore rate: 0.01
Learning rate: 0.1
Streaks: 112


Episode = 28

Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.83445795267767
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=34
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8391850484363
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=35
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8391850484363
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83934586338788
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=37
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8387057165924
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=38
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.83958773171469
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.83895433037291
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=40
Action: 1
State: (0, 0,

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84037257030106
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=94
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84053219773075
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=95
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.84022661724764
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=96
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84080493408725
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=97
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.84044364399752
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=98
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84057761589469
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=99
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84057761589469
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=100
Action: 0
State: (0, 0, 3, 1)

State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84210103103787
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84225893000684
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84241667107683
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=156
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.84161506484205
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=157
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84263672038455
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=158
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.83758015299497
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=159
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84155794218587
Explore rate: 0.01
Learning rate: 0.1
Streaks: 113


Episode = 287
t=160
Action: 1
State: (0, 0,


Episode = 288
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84374062930348
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84374062930348
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84389688867418
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=17
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.84286630857645
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=18
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84375222917839
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=19
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84375222917839
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=20
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84390847694921
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=21



Episode = 288
t=73
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.844494743071
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.844494743071
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=75
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84465024832792
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=76
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8448055980796
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=77
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.84342160135586
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=78
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84578915702984
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=79
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84578915702984
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=80
Actio


Episode = 288
t=142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84693148188178
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84693148188178
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8470845503999
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=145
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.84641554868634
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=146
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8473088585845
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=147
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8473088585845
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t=148
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.84238394320627
Explore rate: 0.01
Learning rate: 0.1
Streaks: 114


Episode = 288
t


Episode = 289
t=11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84800013049403
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84815213036353
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.84755914688515
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=14
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.84838193595148
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=15
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.84779304385583
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84849306344466
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=17
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84849306344466
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=18


State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.84960584229263
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84973943923751
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84973943923751
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=83
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.84988969979827
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=84
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8488371913687
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=85
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85032163987273
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=86
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85032163987273
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=87
Action: 1
State: (0, 0, 3, 1)
R


Episode = 289
t=140
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85191791929348
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=141
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8520660013742
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=142
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.84701615836752
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=143
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85172251044659
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=144
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85172251044659
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=145
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85187078793615
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289
t=146
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.85106377588714
Explore rate: 0.01
Learning rate: 0.1
Streaks: 115


Episode = 289


Episode = 290
t=11
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85344550792605
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=12
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85359206241813
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=13
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.85213560013287
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=14
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85428241499966
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=15
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85428241499966
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=16
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85442813258466
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=17
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.85319327967306
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=18


Learning rate: 0.1
Streaks: 116


Episode = 290
t=70
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85561028877476
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=71
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.85450445193578
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=72
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85480191519501
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=73
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85480191519501
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=74
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85494711327982
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=75
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.85434733381027
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=76
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85577515137902
Explore rate: 0.01
Learning rate: 0.1
S


Episode = 290
t=139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.8561057940438
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85706277195385
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85706277195385
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8572057091819
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=143
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.85639804598497
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=144
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85720519657545
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290
t=145
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85720519657545
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116


Episode = 290



Episode = 290
t=199
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8588242251281
Explore rate: 0.01
Learning rate: 0.1
Streaks: 116

Episode 290 finished after 199.000000 time steps

Episode = 291
t=0
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85818098752993
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=1
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.85764324811566
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=2
Action: 0
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85839720745162
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=3
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.85255098780031
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=4
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85896540090297
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=5
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.85896540090297
Explore rate: 0.01
Learning r


Episode = 291
t=59
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.85167748051984
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=60
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85892281433148
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=61
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85892281433148
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=62
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.85906389151715
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=63
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.85879386909686
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=64
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86019230448883
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=65
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86019230448883
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=66



Episode = 291
t=118
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86178306892181
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=119
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86192128585289
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=120
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.85338813482555
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=121
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8606076758518
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=122
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8606076758518
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=123
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86074706817594
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=124
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.86021727827615
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291



Episode = 291
t=177
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86263978303742
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=178
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86277714325439
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=179
Action: 1
State: (0, 0, 2, 0)
Reward: 1.0
Best Q: 99.85769279569506
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=180
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86234497264118
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=181
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86234497264118
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=182
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86248262766854
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 291
t=183
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86262014504088
Explore rate: 0.01
Learning rate: 0.1
Streaks: 117


Episode = 29


Episode = 292
t=38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86394999892322
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8630041739038
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=40
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86411485724834
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=41
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.863251127381
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=42
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86416523313423
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=43
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.85604818394745
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=44
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86416676756099
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=45
Act


Episode = 292
t=105
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86594103063638
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=106
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86607508960574
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=107
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86620901451613
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=108
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8653967636994
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=109
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8663828045577
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=110
Action: 0
State: (0, 0, 3, 2)
Reward: 1.0
Best Q: 99.85699587554124
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=111
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86498861488158
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292



Episode = 292
t=174
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86631563578747
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=175
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.86635725441683
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=176
Action: 1
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86657375632379
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=177
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86657375632379
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=178
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8665123308512
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=179
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86615360432394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292
t=180
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86615360432394
Explore rate: 0.01
Learning rate: 0.1
Streaks: 118


Episode = 292


Episode = 293
t=33
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86732899698815
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=34
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86746166799117
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=35
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.8666176211832
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=36
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86731139919583
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=37
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86731139919583
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=38
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86744408779663
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=39
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.86734716194124
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=40
A


Episode = 293
t=92
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8684445167969
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=93
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.86846379828017
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=94
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86904500945305
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=95
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.86904500945305
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=96
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8691759644436
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=97
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.86817972051387
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=98
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86869638108183
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=99
Ac

Learning rate: 0.1
Streaks: 119


Episode = 293
t=152
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87043140858195
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=153
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.86947311909219
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=154
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86987062676513
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=155
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.86987062676513
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=156
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87000075613837
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=157
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.8699958805599
Explore rate: 0.01
Learning rate: 0.1
Streaks: 119


Episode = 293
t=158
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87058271824615
Explore rate: 0.01
Learning rate:


Episode = 294
t=10
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87159208865482
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=11
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87159208865482
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=12
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87172049656617
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=13
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.87129328880054
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=14
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87168639152398
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=15
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87168639152398
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=16
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87181470513245
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=17



Episode = 294
t=77
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87258747290055
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=78
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87271488542765
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=79
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.87212151150153
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=80
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87271879627387
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=81
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87271879627387
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=82
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.8728460774776
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=83
Action: 0
State: (0, 0, 2, 2)
Reward: 1.0
Best Q: 99.87245668260769
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=84
A

Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=136
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87387749073183
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=137
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.87387749073183
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=138
Action: 1
State: (0, 0, 3, 1)
Reward: 1.0
Best Q: 99.8740036132411
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=139
Action: 1
State: (0, 0, 3, 0)
Reward: 1.0
Best Q: 99.87313735754779
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=140
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87451600914031
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=141
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87451600914031
Explore rate: 0.01
Learning rate: 0.1
Streaks: 120


Episode = 294
t=142
Action: 0
State: (0, 0, 2, 1)
Reward: 1.0
Best Q: 99.87464149313116
Explore rate: 

In [19]:
env.close()

######  State action table 

In [20]:
q_table

array([[[[[ 0.        ,  0.        ],
          [ 0.        ,  0.        ],
          [ 0.        ,  0.        ]],

         [[72.32116405, 73.9784133 ],
          [52.18025505, 73.9363331 ],
          [40.42120602, 33.20650437]],

         [[99.87191113, 98.6750309 ],
          [99.87573952, 99.70213473],
          [98.89585788, 99.87545661]],

         [[99.87415673, 99.32989218],
          [99.5272281 , 99.87504384],
          [98.09661538, 99.86919502]],

         [[ 3.940399  ,  0.        ],
          [54.12191014, 51.23599749],
          [51.98114376, 54.41309887]],

         [[ 0.        ,  0.        ],
          [ 0.        ,  0.        ],
          [ 0.        ,  0.        ]]]]])