In [None]:
import math


class Robot:
    def __init__(self, x, y, game):
        self.x, self.y = x, y
        self.game = game
    
    @property
    def coords(self): return self.x, self.y
    
    def __repr__(self):
        return 'R'
    
    def set_state(self, x, y):
        self.game[self.y][self.x] = 0
        self.x, self.y = x, y
        self.game[y][x] = self
        

class Environment:
    def __repr__(self):
        return '\n'.join(map(lambda row: ' '.join(map(lambda x: x.__repr__(), row)), self.state))
    
    def __init__(self, size):
        self.size = size
        self.state = [[0] * size for _ in range(size)]
        
        self.robot = Robot(0, 0, self.state)
        self.robot.set_state(0, 0)
        
        self.goal = size, size
    
    def set_goal(self):
        self.state[self.goal[1]][self.goal[0]] = 1 << 16
        
    def reset(self):
        self.state = [[0] * size for _ in range(size)]
        self.robot.set_state(0, 0)
        self.set_goal()
        
    def get_pos(x, y):
        if 0 <= x < self.size and 0 <= y < self.size:
            return self.state[y][x]
        return None
    
    def step(self):
        self.state = []
        action = max(self.get_actions(), key=self.get_score)
        
        self.robot.set_state(action)
        
    def get_score(self, action):
        return 1 / (0.01 + math.dist(action, self.goal))
    
    def get_actions(self):
        return [a for a in [
            (self.robot.x - 1, self.robot.y),
            (self.robot.x, self.robot.y - 1),
            (self.robot.x + 1, self.robot.y),
            (self.robot.x, self.robot.y + 1)
        ] if self.get_pos(*a) is not None]

In [None]:
env = Environment(3)

In [None]:
print(env.get_actions())

In [None]:
env.robot.set_state()