In [4]:
"""
Reinforcement learning maze example.
Red rectangle:          explorer.
Black rectangles:       hells       [reward = -1].
Yellow bin circle:      paradise    [reward = +1].
All other states:       ground      [reward = 0].
This script is the environment part of this example. The RL is in RL_brain.py.
View more on my tutorial page: https://morvanzhou.github.io/tutorials/
"""

import numpy as np
import time
import sys
if sys.version_info.major == 2:
    import Tkinter as tk
else:
    import tkinter as tk


UNIT = 40   # pixels
MAZE_H = 15  # grid height
MAZE_W = 25  # grid width


class Maze(tk.Tk, object):
    def __init__(self):
        super(Maze, self).__init__()
        self.action_space = ['u', 'd', 'l', 'r']
        self.n_actions = len(self.action_space)
        self.title('maze')
        self.geometry('{0}x{1}'.format(MAZE_W * UNIT, MAZE_H * UNIT))
        self._build_maze()

    def _build_maze(self):
        self.canvas = tk.Canvas(self, bg='gray',
                           height=MAZE_H * UNIT,
                           width=MAZE_W * UNIT)

        # create grids
        for c in range(0, MAZE_W * UNIT, UNIT):
            x0, y0, x1, y1 = c, 0, c, MAZE_H * UNIT
            self.canvas.create_line(x0, y0, x1, y1)
        for r in range(0, MAZE_H * UNIT, UNIT):
            x0, y0, x1, y1 = 0, r, MAZE_W * UNIT, r
            self.canvas.create_line(x0, y0, x1, y1)

        # create origin
        origin = np.array([[140, 140],[500,140]])
        track_set=[[0,0], [1,0], [2,-1], [3,0], [4,0], [5,-1], [6,0], [7,0], [0,1],
                   [2,1], [5,1], [7,1], [0,3], [2,3], [5,3], [7,3], [0,4], [1,4], 
                   [2,5], [3,4], [4,4], [5,5], [6,4], [7,4]]

        # hell
        for block in range(len(origin)):
            for cor in track_set:
                center = origin[block] + np.array([UNIT*cor[0], UNIT*cor[1]])
                self.canvas.create_rectangle(
                    center[0] - 15, center[1] - 15,
                    center[0] + 15, center[1] + 15,
                    fill='black')

        # pack all
        self.canvas.pack()
    
    def new_step(self,agv,base_action,trans_fini,ind):
            
        r_d = eval("self.rect"+str(agv.idx))
        r_t = eval("self.txt"+str(agv.idx))
        self.canvas.move(r_d, base_action[0], base_action[1])  # move agent
        pos = self.canvas.coords(r_d)
        nor_pos = [int((pos[0]+pos[2])/2),int((pos[1]+pos[3])/2)]
        
        if not trans_fini:
            self.canvas.delete(r_t)
            exec(f'self.txt{agv.idx}=self.canvas.create_text(nor_pos[0],nor_pos[1],text=str(ind),font=("Arial", 15))')
            self.canvas.pack()

        if trans_fini:
            #time.sleep(0.5)
            self.canvas.delete(r_d)
            self.canvas.delete(r_t)
            self.generate_agv(agv,nor_pos,ind)
        
        #8 and 16 delete action 2 right
        #9 and 17 之後要補上 動作 3 左
        #13 之後看條件刪除 動作0 上 或是不刪當作增加截近

    def step(self,agv,action):

        #exec("d=self.rect"+str(agv.idx))
        d=eval("self.rect"+str(agv.idx))
        s = self.canvas.coords(d)
        #s=agv.coord
        base_action = np.array([0, 0])
        if action == 0:   # up
            if s[1] > UNIT:
                base_action[1] -= UNIT
        elif action == 1:   # down
            if s[1] < (MAZE_H - 1) * UNIT:
                base_action[1] += UNIT
        elif action == 2:   # right
            if s[0] < (MAZE_W - 1) * UNIT:
                base_action[0] += UNIT
        elif action == 3:   # left
            if s[0] > UNIT:
                base_action[0] -= UNIT

        self.canvas.move(d, base_action[0], base_action[1])  # move agent
        time.sleep(0.1)

        s_ = self.canvas.coords(d)  # next state
        
        return s_, -1, False

        '''# reward function
        if s_ == self.canvas.coords(b):
            reward = 3
            done = True
            s_ = 'terminal'
        #elif s_ in [self.canvas.coords(self.hell1), self.canvas.coords(self.hell2)]:
            #reward = -1
            #done = True
            #s_ = 'terminal'
        else:
            reward = -1
            done = False

        return s_, reward, done'''

    def render(self):
        time.sleep(0.05)
        self.update()
    
    def reset_agv(self,agv,ind=None):
        eval(f'self.canvas.delete(self.rect{agv.idx})')
        eval(f'self.canvas.delete(self.txt{agv.idx})')
        self.generate_agv(agv,agv.cur,ind)
        
    def generate_agv(self,agv,begin,ind=None):
            
        x_center=begin[0]
        y_center=begin[1]
        
        if agv.state==1:
            color='blue'
        elif agv.state==2:
            color='yellow'
        elif agv.state==0:
            color='orange'
        
        exec("""self.rect"""+str(agv.idx)+""" = self.canvas.create_rectangle(
            x_center - 15, y_center - 15, 
            x_center + 15, y_center + 15,
            fill=color)
            """)
        if agv.state!=0:
            exec(f'self.txt{agv.idx}=self.canvas.create_text(x_center,y_center,text=str(ind),font=("Arial", 15))')
        else:
            exec(f'self.txt{agv.idx}=self.canvas.create_text(x_center,y_center,text="N",font=("Arial", 15))')
        self.canvas.pack()
    
    def update_req(self,req_set,req_cor,stocker,init=False):
        if not init:
            eval(f'self.canvas.delete(self.req{stocker})')
            
        exec(f'self.req{stocker}=self.canvas.create_text(req_cor[0],req_cor[1],text=str(len(req_set[stocker])),font=("Arial", 15))')
        self.canvas.pack()
        self.update()

if __name__ == '__main__':
    env = Maze()
    #env.after(100, update)
    env.mainloop()