Reinforcement learning maze example.  

Red rectangle:          explorer.  
Black rectangles:       hells       (reward = -1).  
Yellow bin circle:      paradise    (reward = +1).  
All other states:       ground      (reward = 0).  

<img src="grid.png">

In [5]:
'''
First we import all the libraries and the Grid environment
'''
import numpy as np
import pandas as pd
from grid_env import Maze #Maze environment in grid_env.py

## Q-Learning Algorithm  
<img src="algorithm.png">

In [6]:
'''
We will start by building the Q learning algorithm, which is the brain of the agent.
We will define the decision functions.
'''
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
        self.actions = actions  # a list
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)
        print('Initial Q-table')
        print(self.q_table)
        print("Each column id [0, 1, 2, 3] repesents an action ['up', 'down', 'left', 'right']")


    def choose_action(self, observation):
        self.check_state_exist(observation)
        # action selection
        if np.random.uniform() < self.epsilon:
            # choose best action
            state_action = self.q_table.loc[observation, :]
            # some actions may have the same value, randomly choose one in these actions
            action = np.random.choice(state_action[state_action == np.max(state_action)].index)
        else:
            # choose random action
            action = np.random.choice(self.actions)
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        q_predict = self.q_table.loc[s, a]
        if s_ != 'terminal':
            q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
        else:
            q_target = r  # next state is terminal
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
        return(self.q_table)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # append new state to q table
            self.q_table = self.q_table.append(
                pd.Series(
                    [0]*len(self.actions),
                    index=self.q_table.columns,
                    name=state,
                )
            )


In [7]:
'''
This function runs the program in a loop and updates the neccessary functions
'''

def update():
    for episode in range(100):   
        print('Starting episode',episode)
        # initial observation
        observation = env.reset()

        while True:
            # fresh env
            env.render()

            # RL choose action based on observation
            action = RL.choose_action(str(observation))

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            # RL learn from this transition
            RL.learn(str(observation), action, reward, str(observation_))
            

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
    # end of game
    print('game over')
    print('final Q-table')
    print("Each column id [0, 1, 2, 3] repesents an action ['up', 'down', 'left', 'right']")
    q_table = RL.learn(str(observation), action, reward, str(observation_))
    print(q_table)

    env.destroy()


In [8]:
if __name__ == "__main__":
    env = Maze()
    RL = QLearningTable(actions=list(range(env.n_actions)))

    env.after(100, update)
    env.mainloop()

Initial Q-table
Empty DataFrame
Columns: [0, 1, 2, 3]
Index: []
Each column id [0, 1, 2, 3] repesents an action ['up', 'down', 'left', 'right']
Starting episode 0
Starting episode 1
Starting episode 2
Starting episode 3
Starting episode 4
Starting episode 5
Starting episode 6
Starting episode 7
Starting episode 8
Starting episode 9
Starting episode 10
Starting episode 11
Starting episode 12
Starting episode 13
Starting episode 14
Starting episode 15
Starting episode 16
Starting episode 17
Starting episode 18
Starting episode 19
Starting episode 20
Starting episode 21
Starting episode 22
Starting episode 23
Starting episode 24
Starting episode 25
Starting episode 26
Starting episode 27
Starting episode 28
Starting episode 29
Starting episode 30
Starting episode 31
Starting episode 32
Starting episode 33
Starting episode 34
Starting episode 35
Starting episode 36
Starting episode 37
Starting episode 38
Starting episode 39
Starting episode 40
Starting episode 41
Starting episode 42
Starti