In [1]:
import gym
import numpy as np
import random
import matplotlib.pyplot as plt

In [5]:
env = gym.make("Taxi-v3").env



In [13]:
# Q Table
# 500 states and 6 actions
q_table = np.zeros([env.observation_space.n, env.action_space.n])

In [14]:
q_table

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [15]:
# Hyper parameter
alpha = 0.1
gamma = 0.9
epsilon = 0.1

In [16]:
# Plotting Matrix
reward_list = []
dropouts_list = []

In [17]:
# Episode
episode_number = 10000
for i in range(1, episode_number):
    
    # initialize environment
    state = env.reset()
    
    reward_count = 0
    dropouts = 0
    
    while True:
        # exploit vs explore to find action
        # %10 = explore, %90 exploit
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])
        
        # action process and take reward/observation
        next_state, reward, done, _ = env.step(action)
        
        # Q learning function
        old_value = q_table[state, action]
        next_max = np.max(q_table[next_state])
        
        next_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
        
        # update Q Table
        q_table[state, action] = next_value
        
        # update state
        state = next_state
        
        # find wrong dropouts
        if reward == -10:
            dropouts += 1
        
        if done:
            break
            
        reward_count += reward
    
    if i%10 == 0:
        dropouts_list.append(dropouts)
        reward_list.append(reward_count)
        print("Episode: {}, reward: {}, wrong dropout: {}".format(i, reward_count, dropouts))

Episode: 10, reward: -966, wrong dropout: 41
Episode: 20, reward: -990, wrong dropout: 27
Episode: 30, reward: -924, wrong dropout: 37
Episode: 40, reward: -790, wrong dropout: 28
Episode: 50, reward: -585, wrong dropout: 19
Episode: 60, reward: -376, wrong dropout: 13
Episode: 70, reward: -112, wrong dropout: 2
Episode: 80, reward: -629, wrong dropout: 26
Episode: 90, reward: -176, wrong dropout: 6
Episode: 100, reward: -202, wrong dropout: 4
Episode: 110, reward: -282, wrong dropout: 8
Episode: 120, reward: -123, wrong dropout: 4
Episode: 130, reward: -279, wrong dropout: 9
Episode: 140, reward: -88, wrong dropout: 1
Episode: 150, reward: -134, wrong dropout: 3
Episode: 160, reward: -32, wrong dropout: 0
Episode: 170, reward: -342, wrong dropout: 8
Episode: 180, reward: -180, wrong dropout: 8
Episode: 190, reward: -191, wrong dropout: 7
Episode: 200, reward: -178, wrong dropout: 4
Episode: 210, reward: -131, wrong dropout: 4
Episode: 220, reward: -129, wrong dropout: 3
Episode: 230, 

Episode: 1980, reward: -20, wrong dropout: 0
Episode: 1990, reward: -8, wrong dropout: 0
Episode: 2000, reward: -26, wrong dropout: 1
Episode: 2010, reward: -11, wrong dropout: 0
Episode: 2020, reward: -10, wrong dropout: 0
Episode: 2030, reward: -13, wrong dropout: 0
Episode: 2040, reward: -10, wrong dropout: 0
Episode: 2050, reward: -11, wrong dropout: 0
Episode: 2060, reward: -25, wrong dropout: 1
Episode: 2070, reward: -11, wrong dropout: 0
Episode: 2080, reward: -19, wrong dropout: 1
Episode: 2090, reward: -15, wrong dropout: 0
Episode: 2100, reward: -22, wrong dropout: 1
Episode: 2110, reward: -16, wrong dropout: 0
Episode: 2120, reward: -10, wrong dropout: 0
Episode: 2130, reward: -24, wrong dropout: 1
Episode: 2140, reward: -8, wrong dropout: 0
Episode: 2150, reward: -14, wrong dropout: 0
Episode: 2160, reward: -19, wrong dropout: 0
Episode: 2170, reward: -14, wrong dropout: 0
Episode: 2180, reward: -28, wrong dropout: 1
Episode: 2190, reward: -31, wrong dropout: 2
Episode: 220

Episode: 3960, reward: -11, wrong dropout: 0
Episode: 3970, reward: -22, wrong dropout: 1
Episode: 3980, reward: -23, wrong dropout: 1
Episode: 3990, reward: -11, wrong dropout: 0
Episode: 4000, reward: -13, wrong dropout: 0
Episode: 4010, reward: -19, wrong dropout: 1
Episode: 4020, reward: -32, wrong dropout: 2
Episode: 4030, reward: -12, wrong dropout: 0
Episode: 4040, reward: -13, wrong dropout: 0
Episode: 4050, reward: -17, wrong dropout: 0
Episode: 4060, reward: -13, wrong dropout: 0
Episode: 4070, reward: -13, wrong dropout: 0
Episode: 4080, reward: -24, wrong dropout: 1
Episode: 4090, reward: -13, wrong dropout: 0
Episode: 4100, reward: -13, wrong dropout: 0
Episode: 4110, reward: -11, wrong dropout: 0
Episode: 4120, reward: -28, wrong dropout: 1
Episode: 4130, reward: -20, wrong dropout: 1
Episode: 4140, reward: -14, wrong dropout: 0
Episode: 4150, reward: -18, wrong dropout: 1
Episode: 4160, reward: -14, wrong dropout: 0
Episode: 4170, reward: -22, wrong dropout: 1
Episode: 4

Episode: 6160, reward: -23, wrong dropout: 1
Episode: 6170, reward: -8, wrong dropout: 0
Episode: 6180, reward: -11, wrong dropout: 0
Episode: 6190, reward: -39, wrong dropout: 2
Episode: 6200, reward: -22, wrong dropout: 1
Episode: 6210, reward: -15, wrong dropout: 0
Episode: 6220, reward: -33, wrong dropout: 2
Episode: 6230, reward: -10, wrong dropout: 0
Episode: 6240, reward: -11, wrong dropout: 0
Episode: 6250, reward: -12, wrong dropout: 0
Episode: 6260, reward: -13, wrong dropout: 0
Episode: 6270, reward: -12, wrong dropout: 0
Episode: 6280, reward: -15, wrong dropout: 0
Episode: 6290, reward: -26, wrong dropout: 1
Episode: 6300, reward: -35, wrong dropout: 2
Episode: 6310, reward: -17, wrong dropout: 0
Episode: 6320, reward: -15, wrong dropout: 0
Episode: 6330, reward: -8, wrong dropout: 0
Episode: 6340, reward: -13, wrong dropout: 0
Episode: 6350, reward: -13, wrong dropout: 0
Episode: 6360, reward: -12, wrong dropout: 0
Episode: 6370, reward: -10, wrong dropout: 0
Episode: 638

Episode: 8180, reward: -13, wrong dropout: 0
Episode: 8190, reward: -20, wrong dropout: 1
Episode: 8200, reward: -25, wrong dropout: 1
Episode: 8210, reward: -23, wrong dropout: 1
Episode: 8220, reward: -13, wrong dropout: 0
Episode: 8230, reward: -12, wrong dropout: 0
Episode: 8240, reward: -17, wrong dropout: 0
Episode: 8250, reward: -21, wrong dropout: 1
Episode: 8260, reward: -17, wrong dropout: 0
Episode: 8270, reward: -16, wrong dropout: 0
Episode: 8280, reward: -26, wrong dropout: 1
Episode: 8290, reward: -10, wrong dropout: 0
Episode: 8300, reward: -10, wrong dropout: 0
Episode: 8310, reward: -16, wrong dropout: 0
Episode: 8320, reward: -15, wrong dropout: 0
Episode: 8330, reward: -11, wrong dropout: 0
Episode: 8340, reward: -9, wrong dropout: 0
Episode: 8350, reward: -12, wrong dropout: 0
Episode: 8360, reward: -13, wrong dropout: 0
Episode: 8370, reward: -12, wrong dropout: 0
Episode: 8380, reward: -8, wrong dropout: 0
Episode: 8390, reward: -46, wrong dropout: 3
Episode: 840