In [5]:
import gymnasium as gym
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset()

for _ in range(1000):
    action = env.action_space.sample()  # agent policy that uses the observation and info
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()

env.close()

In [None]:
import numpy as np
import pygame

import gymnasium as gym
from gymnasium import spaces

class SnakeEnv(gym.Env):

    def __init__(self, Lx, Ly, start = None, end = None):
        
        # World shape
        self.Ly, self.Lx = Lx, Ly
        self.observation_space = spaces.Box(low=0, high=3, shape=[150, 150])
        # start and end positions
        self.start = [0,0] if start is None else start
        self.end = None if end is None else end
        self.current_state = self.start
        
        self.done = False

        # space of actions  [Down,  Up,  Right,Left]
        self.action_space = spaces.Discrete(4) 
        self.actions = np.array([[1,0],[-1,0],[0,1],[0,-1]])
        self.num_actions = len(self.actions)
        self.body = []
        
    def reset(self):
        """
        Restart snake by setting current state to start
        """
        self.current_state = self.start
        self.body = []
        self.done = False
        
    def single_step(self, state, action):
        """
        Evolves the environment given action A and current state.
        """
        a = self.actions[action] # action is an integer in [0,1,2,3]
        S_new = copy.deepcopy(state)
        S_new[:2] += a

        # add a penalty for moving
        reward = -1

        # if the snake eats itself, add penalty 
        if S_new[:2] in self.body:
            self.done = True
            reward = -1000

        # update all the body segments in reverse order
        for i in range(len(self.body)-1,0,-1):
            self.body[i] = self.body[i-1]
        
        # update the first segment
        if len(self.body) > 0:
            self.body[0] = self.current_state

        # If we go out of the world, we enter from the other side
        if (S_new[0] == self.Ly):
            S_new[0] = 0
        elif (S_new[0] == -1):
            S_new[0] = self.Ly - 1
        elif (S_new[1] == self.Lx):
            S_new[1] = 0
        elif (S_new[1] == -1):
            S_new[1] = self.Lx - 1

        elif np.all(S_new[:2] == S_new[2:]):
            self.done = True       
            reward = 100  # if we reach the reward we get a reward of 100
            # add an element to the body
            new_segment = self.body[-1] if len(self.body) > 0 else S_new[:2]
            self.body.append(new_segment)
        
        # change the current position
        self.current_state = S_new[:2]
        return S_new, reward, self.done
    


    def get_image(self,state):
        """
        Represent the game as an image, state input is a tuple of 4 elements
        (x,y,x_food,y_food)
        """
        image = np.zeros((self.Lx,self.Ly))
        if state[2] >= 0 and state[2] < self.Lx and state[3] >= 0 and state[3] < self.Ly:
            image[int(state[2]), int(state[3])] = 1

        if state[0] >= 0 and state[0] < self.Lx and state[1] >= 0 and state[1] < self.Ly:
            image[int(state[0]), int(state[1])] = 1
        else:
            # if the agent is out of the world, it is dead and so we cancel the food as well
            # this check is just for safety reasons, if we allow the snake to go through the walls
            # this should never happen
            image[int(state[2]), int(state[3])] = 0 
            
        for i in range(len(self.body)):
            if self.body[i][0] >= 0 and self.body[i][0] < self.Lx and self.body[i][1] >= 0 and self.body[i][1] < self.Ly:
                image[int(self.body[i][0]), int(self.body[i][1])] = 1
            
        return image

    def select_epsilon_greedy_action(self, model, state, epsilon):
        """
        Take random action with probability epsilon, 
        else take best action.
        """
        result = np.random.uniform()
        if result < epsilon:
            return np.random.choice(np.arange(self.num_actions)) 
        else:
            # input is a tensor of floats
            images = self.get_image(state[0]) 
            input = torch.as_tensor(images, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(model.device)

            qs = model(input).cpu().data.numpy()
            return np.argmax(qs)