In [1]:
import gym
import numpy as np
import gym_hanoi

In [3]:
env = gym.make("Hanoi-v0")
env.set_env_parameters(num_disks=7, verbose=True)

Hanoi Environment Parameters have been set to:
	 Number of Disks: 7
	 Transition Failure Probability: 0


In [4]:
action_size = env.action_space.n
print("Action size ", action_size)

state_size = 3 ** env.num_disks
print("State size ", state_size)

Action size  6
State size  2187


In [5]:
qtable = np.zeros((state_size, action_size))

In [6]:
total_episodes = 50000        # Total episodes
total_test_episodes = 100     # Total test episodes
max_steps = 400                # Max steps per episode

learning_rate = 0.7           # Learning rate
gamma = 0.618                 # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.01             # Exponential decay rate for exploration prob

In [7]:
def state_to_q_state(state):
    q_state = 0
    exp = len(state) - 1
    for disk in state:
        q_state += disk * 3 ** exp
        exp -= 1
    return q_state

In [8]:
import random

for episode in range(total_episodes):
    if episode > 0 and episode % 1000 == 0:
        print("### EPISODE %s ###" % episode)
    
    # reset environment each episode
    state = env.reset()
    step = 0
    done = False
    
    for step in range(max_steps):
        q_state = state_to_q_state(state)
        
        # decide if explore or exploit
        tradeoff = random.uniform(0, 1)
        
        if tradeoff > epsilon:
            action = np.argmax(qtable[q_state, :])
        else:
            action = env.action_space.sample()
            
        new_state, reward, done, info = env.step(action)
        
        q_new_state = state_to_q_state(new_state)
    
        if episode > 0 and episode % 1000 == 0 and step == max_steps - 1:
            print(qtable)
        
        qtable[q_state, action] = qtable[q_state, action] + learning_rate * (reward + gamma * np.max(qtable[q_new_state, :]) - qtable[q_state, action])
        
        state = new_state
        
        if done:
            break
        
    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)

### EPISODE 1000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.6177596  -2.61775963 -2.61776131 -2.61776531 -2.61776198 -2.61776777]
 [-2.61774956 -2.61774791 -2.61775219 -2.61775226 -2.61773999 -2.61774044]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 2000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778586 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777766 -2.6177778 ]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 3000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778586 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777766 -2.6177778 ]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 4000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778586 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777766 -2.6177778 ]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 5000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 6000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 7000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 8000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 9000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 10000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 11000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 12000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 13000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 14000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 15000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 16000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778637 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777857 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 17000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 18000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 19000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 20000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 21000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 22000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 23000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 24000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 25000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 26000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 27000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 28000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 29000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 30000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 31000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 32000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 33000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 34000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 35000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 36000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 37000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 38000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 39000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 40000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 41000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 42000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 43000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 44000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 45000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 46000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 47000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 48000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


### EPISODE 49000 ###
[[-2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105 -2.61780105]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 ...
 [-2.61778692 -2.61778912 -2.6177896  -2.61778699 -2.6177876  -2.61779008]
 [-2.6177808  -2.61778015 -2.61778519 -2.61778187 -2.61777993 -2.61777861]
 [ 0.          0.          0.          0.          0.          0.        ]]


In [9]:
rewards = []

for episode in range(total_test_episodes):
    state = env.reset()
    step = 0
    done = False
    total_rewards = 0
    
    for step in range(max_steps):
        q_state = state_to_q_state(state)
        
        action = np.argmax(qtable[q_state, :])
        
        new_state, reward, done, info = env.step(action)
        
        total_rewards += reward
        
        if done:
            rewards.append(total_rewards)
            break
            
        state = new_state

env.close()
print("Score over time: %s" % (sum(rewards) / total_test_episodes))




Score over time: 0.0


In [9]:
print(qtable)

[[  3.4428597    5.57097039   3.4428597    3.4428597    3.4428597
    3.4428597 ]
 [  0.           0.           0.           8.61692363   0.
    0.        ]
 [  0.          22.0388408    8.83332758  10.59784962   8.83112224
    0.        ]
 [ 14.58659418   9.01451018   9.01396972   5.57095266   9.01451381
    9.0145056 ]
 [  0.           0.           0.           0.           0.
    0.        ]
 [ 23.6029032   23.6029032   38.1924      61.8         38.1924
   38.1924    ]
 [  3.44261585   3.38596633   5.5707362    5.57096391   5.56703283
    9.0145152 ]
 [  0.           0.           0.           0.           0.
    0.        ]
 [ 61.8        100.          61.8         61.8         61.8
   38.1924    ]
 [  3.44273708   3.44285766   3.44263553   5.57097039   3.28076278
    3.44285266]
 [  0.           0.           3.47486901   0.           0.
    0.        ]
 [ 30.04907501  61.795769    11.08055084  19.24185167  31.50888964
   33.2937128 ]
 [ 14.58659418  23.6029032    9.0145152    9.014