In [1]:
import tkinter as tk

In [2]:
class Env:
    def __init__(self, map_size=(5,5), target=(3,3), target_reward=10,
                 fail_list=[(3,2),(2,3)], fail_punishment_list=[-5, -5],
                 wall_punishment = -1):
        self._MAP_SIZE = map_size
        self._WALL_PUNISHMENT = wall_punishment
        
        self.TARGET = target
        self.FAIL_LIST = fail_list
        self.REWARD_MAP = self._assign_reward_to_map(target, target_reward,
                                               fail_list, fail_punishment_list)
        
    def _assign_reward_to_map(self, target, target_reward, fail_list, fail_punishment_list):
        tmp_map = np.zeros(self._MAP_SIZE, dtype='int')
        target = self.TARGET
        fail_list = self.FAIL_LIST
        
        # assign reward when reach the target 
        tmp_map[target] = target_reward
        
        # assign failure punishment
        for coordinate, punishment in zip(fail_list, fail_punishment_list):
            tmp_map[coordinate] = punishment
        
        return tmp_map
    
    def take_action(self, state, action):
        reward = 0
        next_state = state
        terminal = False
        
        if action=='up':
            if state[0]==0:
                next_state = state # stay in place
                reward = self._WALL_PUNISHMENT
            else:
                next_state = (state[0]-1, state[1])
                reward = self.REWARD_MAP[state]
        elif action=='down':
            if state[0]==self._MAP_SIZE[0]-1:
                next_state = state # stay in place
                reward = self._WALL_PUNISHMENT
            else:
                next_state = (state[0]+1, state[1])
                reward = self.REWARD_MAP[state]
        elif action=='left':
            if state[1]==0:
                next_state = state # stay in place
                reward = self._WALL_PUNISHMENT
            else:
                next_state = (state[0], state[1]-1)
                reward = self.REWARD_MAP[state]
        elif action=='right':
            if state[1]==self._MAP_SIZE[1]-1:
                next_state = state # stay in place
                reward = self._WALL_PUNISHMENT
            else:
                next_state = (state[0], state[1]+1)
                reward = self.REWARD_MAP[state]
        
        # check if terminal
        if self.REWARD_MAP[next_state]!=0:
            terminal=True
            
        return next_state, reward, terminal
    
    
    def showEnvInfo(self, next_state, reward, terminal):
        print("-->{}".format(next_state),end='')
        if terminal:
            if self.REWARD_MAP[next_state]>0:
                print("  >>>win<<<")
            elif self.REWARD_MAP[next_state]<0:
                print("  >>>fail<<<")
            else:
                print("(*&)(*({)(something wrong~")
                

In [None]:
class Env_GUI(Env):
    def __init__(self, map_size=(5,5), target=(3,3), target_reward=10,
                 fail_list=[(3,2),(2,3)], fail_punishment_list=[-5, -5],
                 wall_punishment = -1):
        super().__init__(map_size=map_size, target=target, target_reward=target_reward,
                        fail_list=fail_list, fail_punishment_list= fail_punishment_list,
                        wall_punishment)
        