In [7]:
import gym

env = gym.make("Taxi-v3").env

env.render()
env.reset() # reset environment to a new, random state
env.render()

print("Action Space {}".format(env.action_space))
print("State Space {}".format(env.observation_space))

+---------+
|R: | : :[35mG[0m|
| : | : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |[43mB[0m: |
+---------+

+---------+
|[34;1mR[0m: | : :G|
| : |[43m [0m: : |
| : : : : |
| | : | : |
|Y| : |[35mB[0m: |
+---------+

Action Space Discrete(6)
State Space Discrete(500)


In [8]:
state = env.encode(3, 1, 2, 0) # (taxi row, taxi column, passenger index, destination index)
print("State:", state)

env.s = state
env.render()

State: 328
+---------+
|[35mR[0m: | : :G|
| : | : : |
| : : : : |
| |[43m [0m: | : |
|[34;1mY[0m| : |B: |
+---------+



In [29]:
import numpy as np
q_table = np.zeros([env.observation_space.n, env.action_space.n])

"""Training the agent"""
print("How many generations ?")
gen = int(input())

import random
from IPython.display import clear_output

# Hyperparameters
alpha = 0.1
gamma = 0.6
epsilon = 0.1
for j in range(gen):
    print("Gen : ", j+1)
    time_taken = 0
    total_penalty = 0
    # For plotting metrics
    all_epochs = []
    all_penalties = []
    frames = []
    for i in range(1, 101):
        state = env.reset()

        epochs, penalties, reward, = 0, 0, 0
        done = False

        while not done:
            if random.uniform(0, 1) < epsilon:
                action = env.action_space.sample() # Explore action space
            else:
                action = np.argmax(q_table[state]) # Exploit learned values

            next_state, reward, done, info = env.step(action) 

            old_value = q_table[state, action]
            next_max = np.max(q_table[next_state])

            new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
            q_table[state, action] = new_value

            if reward == -10:
                penalties += 1

            frames.append({
            'frame': env.render(mode='ansi'),
            'state': state,
            'action': action,
            'reward': reward
            }
        )

            state = next_state

            epochs += 1

        if i % 100 == 0:
            print(f"Episode: {i}")
        time_taken += epochs
        total_penalty += penalties
    print(f"Total time in this generation is : {time_taken}, average time taken per episode of generation is : {time_taken/i}")
    print(f"Total penalty : {total_penalty} and average penalty : {total_penalty/100}")
    print("Training finished.\n")

from IPython.display import clear_output
from time import sleep


How many generations ?
1000
Gen :  1
Episode: 100
Total time in this generation is : 46788, average time taken per episode of generation is : 467.88
Total penalty : 2526 and average penalty : 25.26
Training finished.

Gen :  2
Episode: 100
Total time in this generation is : 18828, average time taken per episode of generation is : 188.28
Total penalty : 664 and average penalty : 6.64
Training finished.

Gen :  3
Episode: 100
Total time in this generation is : 13288, average time taken per episode of generation is : 132.88
Total penalty : 470 and average penalty : 4.7
Training finished.

Gen :  4
Episode: 100
Total time in this generation is : 9302, average time taken per episode of generation is : 93.02
Total penalty : 286 and average penalty : 2.86
Training finished.

Gen :  5
Episode: 100
Total time in this generation is : 7765, average time taken per episode of generation is : 77.65
Total penalty : 251 and average penalty : 2.51
Training finished.

Gen :  6
Episode: 100
Total time in

Episode: 100
Total time in this generation is : 1536, average time taken per episode of generation is : 15.36
Total penalty : 53 and average penalty : 0.53
Training finished.

Gen :  49
Episode: 100
Total time in this generation is : 1545, average time taken per episode of generation is : 15.45
Total penalty : 50 and average penalty : 0.5
Training finished.

Gen :  50
Episode: 100
Total time in this generation is : 1617, average time taken per episode of generation is : 16.17
Total penalty : 46 and average penalty : 0.46
Training finished.

Gen :  51
Episode: 100
Total time in this generation is : 1558, average time taken per episode of generation is : 15.58
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  52
Episode: 100
Total time in this generation is : 1509, average time taken per episode of generation is : 15.09
Total penalty : 46 and average penalty : 0.46
Training finished.

Gen :  53
Episode: 100
Total time in this generation is : 1529, average time take

Episode: 100
Total time in this generation is : 1555, average time taken per episode of generation is : 15.55
Total penalty : 49 and average penalty : 0.49
Training finished.

Gen :  94
Episode: 100
Total time in this generation is : 1536, average time taken per episode of generation is : 15.36
Total penalty : 57 and average penalty : 0.57
Training finished.

Gen :  95
Episode: 100
Total time in this generation is : 1474, average time taken per episode of generation is : 14.74
Total penalty : 47 and average penalty : 0.47
Training finished.

Gen :  96
Episode: 100
Total time in this generation is : 1491, average time taken per episode of generation is : 14.91
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  97
Episode: 100
Total time in this generation is : 1479, average time taken per episode of generation is : 14.79
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  98
Episode: 100
Total time in this generation is : 1484, average time tak

Episode: 100
Total time in this generation is : 1496, average time taken per episode of generation is : 14.96
Total penalty : 39 and average penalty : 0.39
Training finished.

Gen :  139
Episode: 100
Total time in this generation is : 1521, average time taken per episode of generation is : 15.21
Total penalty : 60 and average penalty : 0.6
Training finished.

Gen :  140
Episode: 100
Total time in this generation is : 1469, average time taken per episode of generation is : 14.69
Total penalty : 41 and average penalty : 0.41
Training finished.

Gen :  141
Episode: 100
Total time in this generation is : 1471, average time taken per episode of generation is : 14.71
Total penalty : 47 and average penalty : 0.47
Training finished.

Gen :  142
Episode: 100
Total time in this generation is : 1474, average time taken per episode of generation is : 14.74
Total penalty : 37 and average penalty : 0.37
Training finished.

Gen :  143
Episode: 100
Total time in this generation is : 1475, average time

Episode: 100
Total time in this generation is : 1468, average time taken per episode of generation is : 14.68
Total penalty : 50 and average penalty : 0.5
Training finished.

Gen :  184
Episode: 100
Total time in this generation is : 1471, average time taken per episode of generation is : 14.71
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  185
Episode: 100
Total time in this generation is : 1457, average time taken per episode of generation is : 14.57
Total penalty : 35 and average penalty : 0.35
Training finished.

Gen :  186
Episode: 100
Total time in this generation is : 1462, average time taken per episode of generation is : 14.62
Total penalty : 50 and average penalty : 0.5
Training finished.

Gen :  187
Episode: 100
Total time in this generation is : 1481, average time taken per episode of generation is : 14.81
Total penalty : 39 and average penalty : 0.39
Training finished.

Gen :  188
Episode: 100
Total time in this generation is : 1468, average time 

Episode: 100
Total time in this generation is : 1524, average time taken per episode of generation is : 15.24
Total penalty : 44 and average penalty : 0.44
Training finished.

Gen :  228
Episode: 100
Total time in this generation is : 1485, average time taken per episode of generation is : 14.85
Total penalty : 41 and average penalty : 0.41
Training finished.

Gen :  229
Episode: 100
Total time in this generation is : 1474, average time taken per episode of generation is : 14.74
Total penalty : 57 and average penalty : 0.57
Training finished.

Gen :  230
Episode: 100
Total time in this generation is : 1480, average time taken per episode of generation is : 14.8
Total penalty : 59 and average penalty : 0.59
Training finished.

Gen :  231
Episode: 100
Total time in this generation is : 1456, average time taken per episode of generation is : 14.56
Total penalty : 31 and average penalty : 0.31
Training finished.

Gen :  232
Episode: 100
Total time in this generation is : 1479, average time

Episode: 100
Total time in this generation is : 1482, average time taken per episode of generation is : 14.82
Total penalty : 40 and average penalty : 0.4
Training finished.

Gen :  273
Episode: 100
Total time in this generation is : 1468, average time taken per episode of generation is : 14.68
Total penalty : 39 and average penalty : 0.39
Training finished.

Gen :  274
Episode: 100
Total time in this generation is : 1450, average time taken per episode of generation is : 14.5
Total penalty : 37 and average penalty : 0.37
Training finished.

Gen :  275
Episode: 100
Total time in this generation is : 1456, average time taken per episode of generation is : 14.56
Total penalty : 48 and average penalty : 0.48
Training finished.

Gen :  276
Episode: 100
Total time in this generation is : 1471, average time taken per episode of generation is : 14.71
Total penalty : 54 and average penalty : 0.54
Training finished.

Gen :  277
Episode: 100
Total time in this generation is : 1495, average time 

Episode: 100
Total time in this generation is : 1450, average time taken per episode of generation is : 14.5
Total penalty : 36 and average penalty : 0.36
Training finished.

Gen :  318
Episode: 100
Total time in this generation is : 1460, average time taken per episode of generation is : 14.6
Total penalty : 41 and average penalty : 0.41
Training finished.

Gen :  319
Episode: 100
Total time in this generation is : 1443, average time taken per episode of generation is : 14.43
Total penalty : 32 and average penalty : 0.32
Training finished.

Gen :  320
Episode: 100
Total time in this generation is : 1474, average time taken per episode of generation is : 14.74
Total penalty : 48 and average penalty : 0.48
Training finished.

Gen :  321
Episode: 100
Total time in this generation is : 1422, average time taken per episode of generation is : 14.22
Total penalty : 42 and average penalty : 0.42
Training finished.

Gen :  322
Episode: 100
Total time in this generation is : 1519, average time 

Episode: 100
Total time in this generation is : 1485, average time taken per episode of generation is : 14.85
Total penalty : 44 and average penalty : 0.44
Training finished.

Gen :  363
Episode: 100
Total time in this generation is : 1481, average time taken per episode of generation is : 14.81
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  364
Episode: 100
Total time in this generation is : 1465, average time taken per episode of generation is : 14.65
Total penalty : 44 and average penalty : 0.44
Training finished.

Gen :  365
Episode: 100
Total time in this generation is : 1507, average time taken per episode of generation is : 15.07
Total penalty : 39 and average penalty : 0.39
Training finished.

Gen :  366
Episode: 100
Total time in this generation is : 1538, average time taken per episode of generation is : 15.38
Total penalty : 44 and average penalty : 0.44
Training finished.

Gen :  367
Episode: 100
Total time in this generation is : 1474, average tim

Episode: 100
Total time in this generation is : 1437, average time taken per episode of generation is : 14.37
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  408
Episode: 100
Total time in this generation is : 1458, average time taken per episode of generation is : 14.58
Total penalty : 47 and average penalty : 0.47
Training finished.

Gen :  409
Episode: 100
Total time in this generation is : 1412, average time taken per episode of generation is : 14.12
Total penalty : 35 and average penalty : 0.35
Training finished.

Gen :  410
Episode: 100
Total time in this generation is : 1513, average time taken per episode of generation is : 15.13
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  411
Episode: 100
Total time in this generation is : 1442, average time taken per episode of generation is : 14.42
Total penalty : 50 and average penalty : 0.5
Training finished.

Gen :  412
Episode: 100
Total time in this generation is : 1435, average time

Episode: 100
Total time in this generation is : 1444, average time taken per episode of generation is : 14.44
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  452
Episode: 100
Total time in this generation is : 1501, average time taken per episode of generation is : 15.01
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  453
Episode: 100
Total time in this generation is : 1461, average time taken per episode of generation is : 14.61
Total penalty : 46 and average penalty : 0.46
Training finished.

Gen :  454
Episode: 100
Total time in this generation is : 1470, average time taken per episode of generation is : 14.7
Total penalty : 42 and average penalty : 0.42
Training finished.

Gen :  455
Episode: 100
Total time in this generation is : 1456, average time taken per episode of generation is : 14.56
Total penalty : 43 and average penalty : 0.43
Training finished.

Gen :  456
Episode: 100
Total time in this generation is : 1445, average time

Episode: 100
Total time in this generation is : 1475, average time taken per episode of generation is : 14.75
Total penalty : 50 and average penalty : 0.5
Training finished.

Gen :  497
Episode: 100
Total time in this generation is : 1506, average time taken per episode of generation is : 15.06
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  498
Episode: 100
Total time in this generation is : 1463, average time taken per episode of generation is : 14.63
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  499
Episode: 100
Total time in this generation is : 1473, average time taken per episode of generation is : 14.73
Total penalty : 42 and average penalty : 0.42
Training finished.

Gen :  500
Episode: 100
Total time in this generation is : 1470, average time taken per episode of generation is : 14.7
Total penalty : 47 and average penalty : 0.47
Training finished.

Gen :  501
Episode: 100
Total time in this generation is : 1411, average time 

Episode: 100
Total time in this generation is : 1382, average time taken per episode of generation is : 13.82
Total penalty : 32 and average penalty : 0.32
Training finished.

Gen :  542
Episode: 100
Total time in this generation is : 1453, average time taken per episode of generation is : 14.53
Total penalty : 33 and average penalty : 0.33
Training finished.

Gen :  543
Episode: 100
Total time in this generation is : 1454, average time taken per episode of generation is : 14.54
Total penalty : 47 and average penalty : 0.47
Training finished.

Gen :  544
Episode: 100
Total time in this generation is : 1484, average time taken per episode of generation is : 14.84
Total penalty : 51 and average penalty : 0.51
Training finished.

Gen :  545
Episode: 100
Total time in this generation is : 1485, average time taken per episode of generation is : 14.85
Total penalty : 49 and average penalty : 0.49
Training finished.

Gen :  546
Episode: 100
Total time in this generation is : 1487, average tim

Episode: 100
Total time in this generation is : 1497, average time taken per episode of generation is : 14.97
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  586
Episode: 100
Total time in this generation is : 1446, average time taken per episode of generation is : 14.46
Total penalty : 54 and average penalty : 0.54
Training finished.

Gen :  587
Episode: 100
Total time in this generation is : 1508, average time taken per episode of generation is : 15.08
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  588
Episode: 100
Total time in this generation is : 1449, average time taken per episode of generation is : 14.49
Total penalty : 36 and average penalty : 0.36
Training finished.

Gen :  589
Episode: 100
Total time in this generation is : 1527, average time taken per episode of generation is : 15.27
Total penalty : 45 and average penalty : 0.45
Training finished.

Gen :  590
Episode: 100
Total time in this generation is : 1473, average tim

Episode: 100
Total time in this generation is : 1455, average time taken per episode of generation is : 14.55
Total penalty : 38 and average penalty : 0.38
Training finished.

Gen :  631
Episode: 100
Total time in this generation is : 1512, average time taken per episode of generation is : 15.12
Total penalty : 46 and average penalty : 0.46
Training finished.

Gen :  632
Episode: 100
Total time in this generation is : 1438, average time taken per episode of generation is : 14.38
Total penalty : 32 and average penalty : 0.32
Training finished.

Gen :  633
Episode: 100
Total time in this generation is : 1473, average time taken per episode of generation is : 14.73
Total penalty : 49 and average penalty : 0.49
Training finished.

Gen :  634
Episode: 100
Total time in this generation is : 1491, average time taken per episode of generation is : 14.91
Total penalty : 39 and average penalty : 0.39
Training finished.

Gen :  635
Episode: 100
Total time in this generation is : 1489, average tim

Episode: 100
Total time in this generation is : 1452, average time taken per episode of generation is : 14.52
Total penalty : 53 and average penalty : 0.53
Training finished.

Gen :  677
Episode: 100
Total time in this generation is : 1521, average time taken per episode of generation is : 15.21
Total penalty : 38 and average penalty : 0.38
Training finished.

Gen :  678
Episode: 100
Total time in this generation is : 1523, average time taken per episode of generation is : 15.23
Total penalty : 36 and average penalty : 0.36
Training finished.

Gen :  679
Episode: 100
Total time in this generation is : 1448, average time taken per episode of generation is : 14.48
Total penalty : 35 and average penalty : 0.35
Training finished.

Gen :  680
Episode: 100
Total time in this generation is : 1521, average time taken per episode of generation is : 15.21
Total penalty : 54 and average penalty : 0.54
Training finished.

Gen :  681
Episode: 100
Total time in this generation is : 1488, average tim

Episode: 100
Total time in this generation is : 1461, average time taken per episode of generation is : 14.61
Total penalty : 44 and average penalty : 0.44
Training finished.

Gen :  722
Episode: 100
Total time in this generation is : 1503, average time taken per episode of generation is : 15.03
Total penalty : 41 and average penalty : 0.41
Training finished.

Gen :  723
Episode: 100
Total time in this generation is : 1479, average time taken per episode of generation is : 14.79
Total penalty : 31 and average penalty : 0.31
Training finished.

Gen :  724
Episode: 100
Total time in this generation is : 1474, average time taken per episode of generation is : 14.74
Total penalty : 43 and average penalty : 0.43
Training finished.

Gen :  725
Episode: 100
Total time in this generation is : 1493, average time taken per episode of generation is : 14.93
Total penalty : 39 and average penalty : 0.39
Training finished.

Gen :  726
Episode: 100
Total time in this generation is : 1460, average tim

Episode: 100
Total time in this generation is : 1471, average time taken per episode of generation is : 14.71
Total penalty : 40 and average penalty : 0.4
Training finished.

Gen :  766
Episode: 100
Total time in this generation is : 1483, average time taken per episode of generation is : 14.83
Total penalty : 42 and average penalty : 0.42
Training finished.

Gen :  767
Episode: 100
Total time in this generation is : 1408, average time taken per episode of generation is : 14.08
Total penalty : 40 and average penalty : 0.4
Training finished.

Gen :  768
Episode: 100
Total time in this generation is : 1487, average time taken per episode of generation is : 14.87
Total penalty : 41 and average penalty : 0.41
Training finished.

Gen :  769
Episode: 100
Total time in this generation is : 1479, average time taken per episode of generation is : 14.79
Total penalty : 53 and average penalty : 0.53
Training finished.

Gen :  770
Episode: 100
Total time in this generation is : 1488, average time 

Episode: 100
Total time in this generation is : 1533, average time taken per episode of generation is : 15.33
Total penalty : 48 and average penalty : 0.48
Training finished.

Gen :  810
Episode: 100
Total time in this generation is : 1434, average time taken per episode of generation is : 14.34
Total penalty : 35 and average penalty : 0.35
Training finished.

Gen :  811
Episode: 100
Total time in this generation is : 1445, average time taken per episode of generation is : 14.45
Total penalty : 36 and average penalty : 0.36
Training finished.

Gen :  812
Episode: 100
Total time in this generation is : 1499, average time taken per episode of generation is : 14.99
Total penalty : 51 and average penalty : 0.51
Training finished.

Gen :  813
Episode: 100
Total time in this generation is : 1445, average time taken per episode of generation is : 14.45
Total penalty : 33 and average penalty : 0.33
Training finished.

Gen :  814
Episode: 100
Total time in this generation is : 1507, average tim

Episode: 100
Total time in this generation is : 1470, average time taken per episode of generation is : 14.7
Total penalty : 46 and average penalty : 0.46
Training finished.

Gen :  854
Episode: 100
Total time in this generation is : 1450, average time taken per episode of generation is : 14.5
Total penalty : 44 and average penalty : 0.44
Training finished.

Gen :  855
Episode: 100
Total time in this generation is : 1517, average time taken per episode of generation is : 15.17
Total penalty : 55 and average penalty : 0.55
Training finished.

Gen :  856
Episode: 100
Total time in this generation is : 1505, average time taken per episode of generation is : 15.05
Total penalty : 44 and average penalty : 0.44
Training finished.

Gen :  857
Episode: 100
Total time in this generation is : 1482, average time taken per episode of generation is : 14.82
Total penalty : 41 and average penalty : 0.41
Training finished.

Gen :  858
Episode: 100
Total time in this generation is : 1486, average time 

Episode: 100
Total time in this generation is : 1482, average time taken per episode of generation is : 14.82
Total penalty : 51 and average penalty : 0.51
Training finished.

Gen :  900
Episode: 100
Total time in this generation is : 1485, average time taken per episode of generation is : 14.85
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  901
Episode: 100
Total time in this generation is : 1460, average time taken per episode of generation is : 14.6
Total penalty : 39 and average penalty : 0.39
Training finished.

Gen :  902
Episode: 100
Total time in this generation is : 1468, average time taken per episode of generation is : 14.68
Total penalty : 40 and average penalty : 0.4
Training finished.

Gen :  903
Episode: 100
Total time in this generation is : 1502, average time taken per episode of generation is : 15.02
Total penalty : 52 and average penalty : 0.52
Training finished.

Gen :  904
Episode: 100
Total time in this generation is : 1493, average time 

Episode: 100
Total time in this generation is : 1419, average time taken per episode of generation is : 14.19
Total penalty : 54 and average penalty : 0.54
Training finished.

Gen :  946
Episode: 100
Total time in this generation is : 1468, average time taken per episode of generation is : 14.68
Total penalty : 46 and average penalty : 0.46
Training finished.

Gen :  947
Episode: 100
Total time in this generation is : 1428, average time taken per episode of generation is : 14.28
Total penalty : 47 and average penalty : 0.47
Training finished.

Gen :  948
Episode: 100
Total time in this generation is : 1487, average time taken per episode of generation is : 14.87
Total penalty : 43 and average penalty : 0.43
Training finished.

Gen :  949
Episode: 100
Total time in this generation is : 1435, average time taken per episode of generation is : 14.35
Total penalty : 37 and average penalty : 0.37
Training finished.

Gen :  950
Episode: 100
Total time in this generation is : 1454, average tim

Episode: 100
Total time in this generation is : 1558, average time taken per episode of generation is : 15.58
Total penalty : 49 and average penalty : 0.49
Training finished.

Gen :  990
Episode: 100
Total time in this generation is : 1529, average time taken per episode of generation is : 15.29
Total penalty : 46 and average penalty : 0.46
Training finished.

Gen :  991
Episode: 100
Total time in this generation is : 1471, average time taken per episode of generation is : 14.71
Total penalty : 47 and average penalty : 0.47
Training finished.

Gen :  992
Episode: 100
Total time in this generation is : 1461, average time taken per episode of generation is : 14.61
Total penalty : 48 and average penalty : 0.48
Training finished.

Gen :  993
Episode: 100
Total time in this generation is : 1498, average time taken per episode of generation is : 14.98
Total penalty : 53 and average penalty : 0.53
Training finished.

Gen :  994
Episode: 100
Total time in this generation is : 1480, average tim

In [30]:
"""Evaluate agent's performance after Q-learning"""

total_epochs, total_penalties = 0, 0
episodes = 100

for _ in range(episodes):
    state = env.reset()
    epochs, penalties, reward = 0, 0, 0
    
    done = False
    
    while not done:
        action = np.argmax(q_table[state])
        state, reward, done, info = env.step(action)

        if reward == -10:
            penalties += 1

        epochs += 1

    total_penalties += penalties
    total_epochs += epochs

print(f"Results after {episodes} episodes:")
print(f"Average timesteps per episode: {total_epochs / episodes}")
print(f"Average penalties per episode: {total_penalties / episodes}")


Results after 100 episodes:
Average timesteps per episode: 13.08
Average penalties per episode: 0.0
