# Reinforcement Learning
## Q-Learning with Q-table

**The code follows the Q-Table implementation of Reinforcement Learning studying to play Taxi-v2 from [this article](https://medium.freecodecamp.org/an-introduction-to-reinforcement-learning-4339519de419)**

In [24]:
import numpy as np
import gym
import random

In [21]:
env = gym.make("Taxi-v2")
env.render()

+---------+
|R: | : :G|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|[34;1mY[0m| : |[35mB[0m: |
+---------+



In [22]:
action_size = env.action_space.n
print("Action Size", action_size)

state_size = env.observation_space.n
print("State Size", state_size)

Action Size 6
State Size 500


In [29]:
qtable = np.zeros((state_size, action_size))
print(qtable.shape)

(500, 6)


In [30]:
total_episodes=50000
total_test_episodes=100
max_steps=99

learning_rate=0.7
gamma=0.618

epsilon=1.0
max_epsilon=1.0
min_epsilon=0.01
decay_rate=0.01

In [38]:
for episode in range(total_episodes):
    
    done = False
    state = env.reset()
    steps = 0
    
    for steps in range(max_steps):
        
        exp_exp_tradeoff = random.uniform(0,1)
        
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state,:])
        else:
            action = env.action_space.sample()
        
        new_state, reward, done, info = env.step(action)
        
        qtable[state, action] += learning_rate * (reward + gamma * np.max(qtable[new_state,:]) - qtable[state, action])
        
        state = new_state
        
        if done:
            break
    
    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)

In [39]:
np.max(qtable)

52.35602094240837

In [41]:
env.reset()
rewards = []

for episode in range(total_test_episodes):
    state = env.reset()
    step = 0
    done = False
    total_reward = 0
    
    for step in range(max_steps):
        env.render()
        
        action = np.argmax(qtable[state,:])
        
        new_state, reward, done, info = env.step(action)
        
        total_reward += reward
        
        if done:
            rewards.append(total_reward)
            print("Score:", total_reward)
            break
        state = new_state
        
env.close()
print("Score over time:", sum(rewards)/total_test_episodes)

+---------+
|[35mR[0m: | : :G|
| : : : :[43m [0m|
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+

+---------+
|[35mR[0m: | : :G|
| : : :[43m [0m: |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (West)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : :[43m [0m: |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : : : |
| | : |[43m [0m: |
|Y| : |[34;1mB[0m: |
+---------+
  (South)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[34;1m[43mB[0m[0m: |
+---------+
  (South)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : : : |
| | : | : |
|Y| : |[42mB[0m: |
+---------+
  (Pickup)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : : : |
| | : |[42m_[0m: |
|Y| : |B: |
+---------+
  (North)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : : :[42m_[0m: |
| | : | : |
|Y| : |B: |
+---------+
  (North)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : :[42m_[

  (North)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| : :[42m_[0m: : |
| | : | : |
|Y| : |B: |
+---------+
  (West)
+---------+
|[35mR[0m: | : :G|
| : : : : |
| :[42m_[0m: : : |
| | : | : |
|Y| : |B: |
+---------+
  (West)
+---------+
|[35mR[0m: | : :G|
| :[42m_[0m: : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
  (North)
+---------+
|[35mR[0m: | : :G|
|[42m_[0m: : : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
  (West)
+---------+
|[35m[42mR[0m[0m: | : :G|
| : : : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
  (North)
Score: 5
+---------+
|[35m[43mR[0m[0m: | : :[34;1mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+

+---------+
|[35mR[0m:[43m [0m| : :[34;1mG[0m|
| : : : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
  (East)
+---------+
|[35mR[0m: | : :[34;1mG[0m|
| :[43m [0m: : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
  (South)
+---------+
|[35mR[0m: | : :[34;1mG[0m|
| : :[43m [0m: : |
