In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pygame
import time
import gymnasium as gym
from gymnasium.spaces import Dict, Discrete, Box
from collections import OrderedDict


pygame 2.6.1 (SDL 2.28.4, Python 3.11.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


# env tests

In [2]:
import numpy as np

def is_point_in_triangle(point, triangle):
    """
    Check if a point is inside a triangle using barycentric coordinates.
    
    Args:
        point (tuple): (x, y) coordinates of the point to check
        triangle (list): List of 3 tuples, each representing vertex coordinates [(x1,y1), (x2,y2), (x3,y3)]
    
    Returns:
        bool: True if point is inside the triangle, False otherwise
    """
    def compute_barycentric_coordinates(pt, v1, v2, v3):
        """
        Compute barycentric coordinates of a point with respect to a triangle.
        
        Args:
            pt (tuple): Point coordinates
            v1, v2, v3 (tuple): Vertex coordinates of the triangle
        
        Returns:
            tuple: Barycentric coordinates (u, v, w)
        """
        pt = np.array(pt)
        v1, v2, v3 = np.array(v1), np.array(v2), np.array(v3)
        
        # Vectorized area computation
        triangle_area = np.abs(np.cross(v2 - v1, v3 - v1)) / 2
        
        # Areas of sub-triangles
        area1 = np.abs(np.cross(pt - v2, v3 - v2)) / 2
        area2 = np.abs(np.cross(v1 - pt, v3 - v1)) / 2
        area3 = np.abs(np.cross(v1 - v2, pt - v2)) / 2
        
        # Compute barycentric coordinates
        u = area1 / triangle_area
        v = area2 / triangle_area
        w = area3 / triangle_area
        
        return u, v, w
    
    # Compute barycentric coordinates
    u, v, w = compute_barycentric_coordinates(point, triangle[0], triangle[1], triangle[2])
    
    # Point is inside if all barycentric coordinates are between 0 and 1 (inclusive)
    return 0 <= u <= 1 and 0 <= v <= 1 and 0 <= w <= 1 and np.abs(u + v + w - 1) < 1e-10


def is_point_in_parallelogram(point, box):
    """
    Check if a point is inside a parallelogram.
    
    Args:
        point (tuple): (x, y) coordinates of the point to check
        box (list): List of 4 tuples, each representing vertex coordinates [(x1,y1), (x2,y2), (x3,y3), (x4,y4)]
    
    Returns:
        bool: True if point is inside the parallelogram, False otherwise
    """
    # Check if the point is in one of the two triangles of the parallelogram
    return is_point_in_triangle(point, [box[0], box[1], box[2]]) or is_point_in_triangle(point, [box[0], box[2], box[3]])


In [None]:
class ObservableDeformedGridworld(gym.Env):

    def __init__(self, grid_size=(1.0, 1.0), step_size=0.02, goal=(0.9, 0.9), 
                 obstacles=None, stretch=(1.0, 1.0), shear=(0.0, 0.0), observation_radius=0.05, render_mode=None):
        """
        Initialize the observable deformed continuous gridworld.
        :param grid_size: Size of the grid (width, height).
        :param step_size: Step size for the agent's movement.
        :param goal: Coordinates of the goal position.
        :param obstacles: List of obstacles as rectangles [(x_min, y_min), (x_max, y_max)].
        :param stretch: Tuple (s_x, s_y) for stretching the grid in x and y directions.
        :param shear: Tuple (sh_x, sh_y) for shearing the grid.
        :param observation_radius: Radius within which the agent can observe its surroundings.
        """
        self.grid_size = np.array(grid_size)
        self.step_size = step_size
        self.goal = np.array(goal)
        self.state = np.array([0.1, 0.1])  # Start at the origin
        self.obstacles = obstacles if obstacles else []
        self.observation_radius = observation_radius

        # Transformation matrix
        self.transformation_matrix = np.array([
            [stretch[0], shear[0]],
            [shear[1], stretch[1]]
        ])
        self.inverse_transformation_matrix = np.linalg.inv(self.transformation_matrix)

        # Rendering mode
        self.render_mode = render_mode

        # gymnasium compatibility
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space =  Dict({
            "pos": gym.spaces.Box(low=.0, high=1.0, shape=(2,),dtype=float),
            "theta": gym.spaces.Box(low=.0, high=1.0, shape=(4,),dtype=float), # deformation is a 2x2 tensor
            "obs": gym.spaces.Box(low=0, high=1, shape=(4,),dtype=int),
        })

        self.stretch_range = np.array([0.4, 1])
        self.shear_range = np.array([-0.2, 0.2])

        self.timestep = 0

        self.corners = [
            np.array([0, 0]),
            np.array([self.grid_size[0], 0]),
            self.grid_size,
            np.array([0, self.grid_size[1]]),
        ]
        self.transformed_corners = [self.transform(corner) for corner in self.corners]
        
    def reset(self,seed=None):
        """
        Reset the environment to the initial state.
        :return: Initial state and observation.
        """
        np.random.seed(seed)
        self.set_deformation(self.sample(2,self.stretch_range), self.sample(2,self.shear_range))  # Reset deformation to random
        self.state = np.array([0.1, 0.1])  # Start at the origin
        #self.state = np.random.rand(2) * self.transform(self.grid_size) # Random start position in the deformable grid
        
        self.transformed_corners = [self.transform(corner) for corner in self.corners]

        state = OrderedDict({
            "pos": self.state,
            "theta": self.transformation_matrix.flatten(),
            "obs": self.observe_obstacle()
        }) 
        
        self.timestep = 0

        # print(f"Initial agent position: {self.state}",
        #       f"Initial goal position: {self.goal}",
        #       f"Initial deformation: {self.transformation_matrix}",
        #       f"Initial observation: {self.observe_obstacle()}",
        #       sep="\n")
        
        return state, {}
    
    def set_deformation(self, stretch, shear):
        """
        Set the deformation transformation matrix based on stretch and shear parameters.
        
        This function creates a transformation matrix to apply grid deformations, including 
        stretching and shearing, to the grid coordinates. It also computes the inverse of 
        this transformation for reversing the deformation.

        :param stretch: A tuple (s_x, s_y) for stretching the grid in the x and y directions.
        :param shear: A tuple (sh_x, sh_y) for shearing the grid in the x and y directions.
        """
        # Create the transformation matrix based on stretch and shear
        self.transformation_matrix = np.array([
            [stretch[0], shear[0]],  # First row: stretch in x and shear in x direction
            [shear[1], stretch[1]]   # Second row: shear in y and stretch in y direction
        ])

        # Calculate the inverse transformation matrix for reversing the deformation
        self.inverse_transformation_matrix = np.linalg.inv(self.transformation_matrix)

        # Optionally, print the transformation matrices for debugging
        # print(f"Transformation Matrix:\n{self.transformation_matrix}")
        # print(f"Inverse Transformation Matrix:\n{self.inverse_transformation_matrix}")

    def set_pos(self, pos):
        """
        Set the agent's state to a new position.
        
        This function directly updates the agent's position (state) to the provided coordinates.

        :param pos: A tuple or array representing the new position of the agent in the grid.
        """
        # Update the state (agent's position)
        self.state = np.array(pos)

        # Optionally, print the new state for debugging
        # print(f"New agent position: {self.state}")

    def transform(self, position):
        """
        Apply the grid deformation to a given position.
        :param position: (x, y) in original space.
        :return: Transformed position in the deformed grid.
        """
        return np.dot(self.transformation_matrix, position)

    def inverse_transform(self, position):
        """
        Map a position from the deformed grid back to the original space.
        :param position: (x, y) in the deformed grid.
        :return: Original position.
        """
        return np.dot(self.inverse_transformation_matrix, position)

    def is_in_obstacle(self, position):
        """
        Check if a given position is inside any obstacle.
        :param position: The (x, y) coordinates to check in the original space.
        :return: True if the position is inside an obstacle, False otherwise.
        """
        for obs in self.obstacles:
            (x_min, y_min), (x_max, y_max) = obs
            x_min_transformed, y_min_transformed = self.transform([x_min, y_min])
            x_max_transformed, y_max_transformed = self.transform([x_max, y_max])
            if x_min_transformed <= position[0] <= x_max_transformed and y_min_transformed <= position[1] <= y_max_transformed:
                return True
        return False

    def observe_single_obstacle(self):
        """
        Check if any part of an obstacle is within the observation radius of the agent.
        :return: True if any part of an obstacle is within the observation radius, False otherwise.
        """
        for obs in self.obstacles:
            (x_min, y_min), (x_max, y_max) = obs
            
            # Clamp the agent's position to the obstacle's boundaries to find the closest point
            closest_x = np.clip(self.state[0], x_min, x_max)
            closest_y = np.clip(self.state[1], y_min, y_max)
            
            # Compute the distance from the agent to this closest point
            closest_point = np.array([closest_x, closest_y])
            distance_to_obstacle = np.linalg.norm(self.state - closest_point)
            
            # Check if this distance is within the observation radius
            if distance_to_obstacle <= self.observation_radius:
                return 1
        
        return 0
    
    def observe_obstacle(self):
        """
        Efficiently and precisely check for obstacles in the four cardinal directions (N, E, S, W).
        Each direction checks for obstacles in a quarter-circle arc within the observation radius.
        :return: A numpy array of shape (4,), where each entry indicates the presence of obstacles 
                in the respective direction (North, East, South, West).
        """
        directions = ["N", "E", "S", "W"]
        obstacle_presence = np.zeros(4)  # Default: no obstacles in any direction

        # Precompute direction boundaries in radians
        direction_ranges = [
            (315, 45),   # North: [-45°, +45°]
            (45, 135),   # East: [+45°, +135°]
            (135, 225),  # South: [+135°, +225°]
            (225, 315)   # West: [+225°, +315°]
        ]
        direction_ranges_rad = [(np.deg2rad(a1), np.deg2rad(a2)) for a1, a2 in direction_ranges]

        for obs in self.obstacles:
            (x_min, y_min), (x_max, y_max) = obs
            x_min, y_min = self.transform([x_min, y_min])
            x_max, y_max = self.transform([x_max, y_max])

            # Generate sampled points along the edges of the obstacle
            num_samples = 5  # Increase for more precision
            edge_points = np.concatenate([
                np.linspace([x_min, y_min], [x_max, y_min], num_samples),  # Bottom edge
                np.linspace([x_max, y_min], [x_max, y_max], num_samples),  # Right edge
                np.linspace([x_max, y_max], [x_min, y_max], num_samples),  # Top edge
                np.linspace([x_min, y_max], [x_min, y_min], num_samples)   # Left edge
            ])

            # Compute vectors from agent to sampled points
            vectors = edge_points - self.state
            distances = np.linalg.norm(vectors, axis=1)

            # Filter points that are outside the observation radius
            within_radius = distances <= self.observation_radius
            if not np.any(within_radius):
                continue  # Skip obstacles entirely outside the radius

            # Compute angles relative to positive Y-axis
            angles = np.arctan2(vectors[:, 1], vectors[:, 0])  # Radians
            angles = (angles + 2 * np.pi) % (2 * np.pi)  # Normalize to [0, 2π)

            # Check which direction each point falls into
            for i, (angle_min, angle_max) in enumerate(direction_ranges_rad):
                if obstacle_presence[i] == 1:
                    continue  # Early exit if the direction is already flagged
                for angle in angles[within_radius]:
                    if (angle_min <= angle < angle_max) or (
                        angle_max < angle_min and (angle >= angle_min or angle < angle_max)
                    ):
                        obstacle_presence[i] = 1
                        break  # No need to check further points for this direction

        return obstacle_presence
        
    def step(self, action):
        """
        Take a step in the environment, interpreting the action in the deformed space.
        :param action: One of ['N', 'S', 'E', 'W'].
        :return: Tuple (next_state, observation, reward, done, info).
        """
        # Map actions to movements in the deformed space
        moves = [np.array([0, self.step_size]),   # Move up in deformed space
            np.array([0, -self.step_size]),  # Move down in deformed space
            np.array([self.step_size, 0]),   # Move right in deformed space
            np.array([-self.step_size, 0])   # Move left in deformed space
        ]

        # Get the movement vector in the deformed space
        move = moves[action]

        # Map the movement to the original space using the inverse transformation
        # move_original = np.dot(self.inverse_transformation_matrix, move)

        # Update state in the original grid space
        next_state = self.state + move

        num_samples = 50  # Number of points to sample along the path
        path = np.linspace(self.state, next_state, num_samples)

        # Check for collisions along the path
        collision = any(self.is_in_obstacle(point) for point in path)

        # Check if the new state is in an obstacle
        if collision:
            reward = -1.0  # Penalty for hitting an obstacle
            info = {"collision": True}
            terminated = False
        # Check if the is inside the deformed grid boundaries
        elif not is_point_in_parallelogram(next_state, self.transformed_corners):
            reward = -2.0
            info = {"out": True}
            next_state = self.state
            terminated = True
        else:   
            transformed_goal = self.transform(self.goal)
            transformed_state = self.transform(self.state)
            terminated = np.linalg.norm(transformed_state - transformed_goal) < self.step_size
            reward = 1.0 if terminated else -0.01
            info = {"collision": False, "out": False}
    
        self.state = next_state
        self.timestep += 1
        truncated = self.timestep > 200 

        if self.render_mode == "human":
            self.render()

        state = OrderedDict({
                    "pos": self.state,
                    "theta": self.transformation_matrix.flatten(),
                    "obs": self.observe_obstacle()
                })

        # Return the transformed state, reward, and terminated truncated flag
        return state, reward, terminated, truncated, info

    def render_old(self):
        """
        Render the deformed gridworld environment using Pygame, ensuring the entire deformed grid fits within the screen.
        """
        import pygame  # Ensure Pygame is imported

        # Define colors
        WHITE = (255, 255, 255)
        BLUE = (0, 0, 255)
        GREEN = (0, 255, 0)
        RED = (255, 0, 0)
        YELLOW = (255, 255, 0)
        BLACK = (0, 0, 0)

        # Initialize the screen
        if not hasattr(self, "screen"):
            self.screen_width = 800
            self.screen_height = 600
            pygame.init()
            self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
            pygame.display.set_caption("Deformed Gridworld Environment")

        # Fill background with white
        self.screen.fill(WHITE)

        # Compute the bounding box of the deformed grid
        corners = [
            self.transform(np.array([0, 0])),
            self.transform(np.array([self.grid_size[0], 0])),
            self.transform(self.grid_size),
            self.transform(np.array([0, self.grid_size[1]])),
        ]
        x_coords, y_coords = zip(*corners)
        min_x, max_x = min(x_coords), max(x_coords)
        min_y, max_y = min(y_coords), max(y_coords)

        # Define scaling factors to fit the deformed grid within the screen
        scale_x = self.screen_width / (max_x - min_x)
        scale_y = self.screen_height / (max_y - min_y)
        scale = min(scale_x, scale_y)  # Uniform scaling to maintain aspect ratio

        # Transform helper for rendering
        def to_screen_coords(pos):
            """
            Map transformed coordinates to screen coordinates, scaled and shifted to fit the screen.
            """
            x, y = pos
            x_screen = int((x - min_x) * scale)
            y_screen = int((max_y - y) * scale)  # Flip y-axis for screen rendering
            return x_screen, y_screen

        # Draw the deformed grid boundaries
        pygame.draw.polygon(self.screen, BLACK, [to_screen_coords(corner) for corner in corners], width=3)

        # Draw the obstacles
        for obs in self.obstacles:
            (x_min, y_min), (x_max, y_max) = obs
            bottom_left = self.transform(np.array([x_min, y_min]))
            bottom_right = self.transform(np.array([x_max, y_min]))
            top_left = self.transform(np.array([x_min, y_max]))
            top_right = self.transform(np.array([x_max, y_max]))
            
            # Draw each obstacle as a polygon in the deformed space
            pygame.draw.polygon(self.screen, RED, [
                to_screen_coords(bottom_left),
                to_screen_coords(bottom_right),
                to_screen_coords(top_right),
                to_screen_coords(top_left)
            ])

        # Draw the agent
        agent_position = self.transform(self.state)
        pygame.draw.circle(self
        .screen, BLUE, to_screen_coords(agent_position), 10)

        # Draw the goal
        goal_position = self.transform(self.goal)
        pygame.draw.circle(self.screen, GREEN, to_screen_coords(goal_position), 12)

        # Draw observation radius as a dashed circle around the agent
        observation_radius = self.observation_radius * max(self.transformation_matrix.diagonal())
        pygame.draw.circle(self.screen, YELLOW, to_screen_coords(agent_position), 
                        int(observation_radius * scale), 1)

        # Update the display
        pygame.display.flip()

        # Handle key events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                return
            elif event.type == pygame.KEYDOWN:
                # Press 'r' to reset environment
                if event.key == pygame.K_r:
                    self.reset()
                # Press 'w' to quit
                elif event.key == pygame.K_w:
                    pygame.quit()
                    return
                # Press 's' to save current state
                elif event.key == pygame.K_s:
                    self.save_state()
                # Press space to pause/resume
                elif event.key == pygame.K_SPACE:
                    self.pause()
                # Press arrow keys for manual control
                elif event.key == pygame.K_LEFT:
                    return self.step(3)  # Left action
                elif event.key == pygame.K_RIGHT:
                    return self.step(2)  # Right action
                elif event.key == pygame.K_UP:
                    return self.step(0)  # Up action
                elif event.key == pygame.K_DOWN:
                    return self.step(1)  # Down action
        return None, None, None, None, None
    
    def close(self):
        self.render_mode = None
        pygame.quit()
    
    def sample(self,num,limit):
        low,high = limit
        return low + np.random.rand(num)*(high-low)
    
    def render(self):
        """
        Render the deformed gridworld environment along with the original gridworld.
        The original gridworld serves as a reference background.
        """
        import pygame  # Ensure Pygame is imported

        # Define colors
        WHITE = (255, 255, 255)
        LIGHT_GRAY = (200, 200, 200)
        BLUE = (0, 0, 255)
        GREEN = (0, 255, 0)
        RED = (255, 0, 0)
        PINK = (255, 105, 180)  
        YELLOW = (255, 255, 0)
        BLACK = (0, 0, 0)

        # Initialize the screen
        if not hasattr(self, "screen"):
            self.screen_width = 1000
            self.screen_height = 1000
            pygame.init()
            self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
            pygame.display.set_caption("Deformed and Original Gridworld")

        # Fill background with white
        self.screen.fill(WHITE)

    # Compute the bounding box of the deformed grid
        corners = [
            np.array([0, 0]),
            np.array([self.grid_size[0], 0]),
            self.grid_size,
            np.array([0, self.grid_size[1]]),
        ]
        transformed_corners = [self.transform(corner) for corner in corners]
        x_coords, y_coords = zip(*transformed_corners)
        min_x, max_x = min(x_coords), max(x_coords)
        min_y, max_y = min(y_coords), max(y_coords)

        # Define scaling factors to fit the deformed grid within the screen
        scale_x = self.screen_width / (max_x - min_x)
        scale_y = self.screen_height / (max_y - min_y)
        scale = min(scale_x, scale_y)  # Uniform scaling to maintain aspect ratio

        # Add upward translation offset
        y_translation = max(0, -min_y * scale)

        # Transform helper for rendering
        def to_screen_coords(pos):
            """
            Map transformed coordinates to screen coordinates, scaled and shifted to fit the screen.
            """
            x, y = pos
            x_screen = int((x - min_x) * scale)
            y_screen = int((max_y - y) * scale + y_translation)  # Flip y-axis and add upward translation
            return x_screen, y_screen
        
        # Draw the un-deformed grid (background)
        for i in range(int(self.grid_size[0]) + 1):
            pygame.draw.line(self.screen, LIGHT_GRAY,
                            to_screen_coords((i, 0)),
                            to_screen_coords((i, self.grid_size[1])), width=1)
        for j in range(int(self.grid_size[1]) + 1):
            pygame.draw.line(self.screen, LIGHT_GRAY,
                            to_screen_coords((0, j)),
                            to_screen_coords((self.grid_size[0], j)), width=1)

        # Draw the deformed grid boundaries
        corners = [
            np.array([0, 0]),
            np.array([self.grid_size[0], 0]),
            self.grid_size,
            np.array([0, self.grid_size[1]]),
        ]
        transformed_corners = [self.transform(corner) for corner in corners]
        pygame.draw.polygon(self.screen, BLACK, [to_screen_coords(corner) for corner in transformed_corners], width=3)

        # Draw the obstacles in both grids
        for obs in self.obstacles:
            (x_min, y_min), (x_max, y_max) = obs
            # Original obstacle
            pygame.draw.rect(self.screen, PINK,
                            (*to_screen_coords((x_min, y_max)),  # Top-left corner
                            int((x_max - x_min) * scale),      # Width
                            int((y_max - y_min) * scale)),    # Height
                            width=0)

            # Transformed obstacle
            bottom_left = self.transform(np.array([x_min, y_min]))
            bottom_right = self.transform(np.array([x_max, y_min]))
            top_left = self.transform(np.array([x_min, y_max]))
            top_right = self.transform(np.array([x_max, y_max]))
            pygame.draw.polygon(self.screen, RED, [
                to_screen_coords(bottom_left),
                to_screen_coords(bottom_right),
                to_screen_coords(top_right),
                to_screen_coords(top_left)
            ])

        # Draw the agent in both grids
        agent_position = self.state
        transformed_agent_position = agent_position
        pygame.draw.circle(self.screen, BLUE, to_screen_coords(agent_position), 10)  # Original
        pygame.draw.circle(self.screen, GREEN, to_screen_coords(transformed_agent_position), 10)  # Transformed

        # Draw the goal in both grids
        goal_position = self.goal
        transformed_goal_position = self.transform(goal_position)
        pygame.draw.circle(self.screen, GREEN, to_screen_coords(goal_position), 12)  # Original
        pygame.draw.circle(self.screen, YELLOW, to_screen_coords(transformed_goal_position), 12)  # Transformed

        # Draw observation radius as a dashed circle around the agent
        observation_radius = self.observation_radius # stays the same in both grids
        pygame.draw.circle(self.screen, YELLOW, to_screen_coords(agent_position), 
                        int(self.observation_radius * scale), 1)  # Original
        pygame.draw.circle(self.screen, YELLOW, to_screen_coords(transformed_agent_position), 
                        int(observation_radius * scale), 1)  # Transformed

        # Update the display
        pygame.display.flip()

        # Handle key events
        # Handle key events
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                return
            elif event.type == pygame.KEYDOWN:
                # Press 'r' to reset environment
                if event.key == pygame.K_r:
                    self.reset()
                # Press 'w' to quit
                elif event.key == pygame.K_w:
                    pygame.quit()
                    return
                # Press 's' to save current state
                elif event.key == pygame.K_s:
                    self.save_state()
                # Press space to pause/resume
                elif event.key == pygame.K_SPACE:
                    self.pause()
                # Press arrow keys for manual control
                elif event.key == pygame.K_LEFT:
                    return self.step(3)  # Left action
                elif event.key == pygame.K_RIGHT:
                    return self.step(2)  # Right action
                elif event.key == pygame.K_UP:
                    return self.step(0)  # Up action
                elif event.key == pygame.K_DOWN:
                    return self.step(1)  # Down action
        return None, None, None, None, None


In [6]:
obstacles = [((0.14625, 0.3325), (0.565, 0.55625)), 
             ((0.52875, 0.5375), (0.7375, 0.84125)), ((0.0, 0.00125), (0.01625, 0.99125)), ((0.0075, 0.00125), (0.99875, 0.04)), ((0.98875, 0.0075), (0.99875, 1.0)), ((0.00125, 0.9825), (0.99875, 1.0))]

# Example Usage
obstacles = [
    [(0.2, 0.2), (0.6, 0.6)],  # Obstacle 1
    [(0.6, 0.6), (0.8, 0.8)]   # Obstacle 2
]

env = ObservableDeformedGridworld(
    obstacles=obstacles, 
    stretch=(.5, .5),
    shear=(.0, .0),
    render_mode="human"
)

# state, observation = env.reset(np.random.randint(100))
env.reset(seed=np.random.randint(100))
done = False
while not done:
    try:
        _, reward, terminated, truncated, _ = env.render()
        if terminated or truncated:
            print(reward)
            env.close()
            break
    except:
        break



# train

In [4]:
from stable_baselines3 import DQN
import numpy as np

obstacles = [((0.14625, 0.3325), (0.565, 0.55625)), 
             ((0.52875, 0.5375), (0.7375, 0.84125)), 
             ((0.0, 0.00125), (0.01625, 0.99125)), 
             ((0.0075, 0.00125), (0.99875, 0.04)), 
             ((0.98875, 0.0075), (0.99875, 1.0)), 
             ((0.00125, 0.9825), (0.99875, 1.0))]


def train_dqn(args):
    from stable_baselines3.common.callbacks import CheckpointCallback
    from wandb.integration.sb3 import WandbCallback
    import wandb

    total_timesteps = args.total_timesteps
    batch_size = args.batch_size
    lr = args.learning_rate
    target_update = args.target_update
    gamma = args.gamma

    config = {
        "policy_type": "MultiInputPolicy",
        "env_name": "ObservableDeformedGridworld",
        "total_timesteps": total_timesteps,
        "Batch_Size": batch_size,
        'grid_size': (1.0,1.0),
        'step_size': 0.1,
        'obstacles':obstacles,
        'observation_radius':0.2,
    }
    run = wandb.init(
        project="DQNsb3 - MDP - ObservableDeformedGridworld",
        config=config,
        sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
        monitor_gym=True,  # auto-upload the videos of agents playing the game
        save_code=True,  # optional
    )


    # Save a checkpoint every 10000 steps
    checkpoint_callback = CheckpointCallback(
                            save_freq=10000,
                            save_path=f"DQNsb3_{run.id}",
                            name_prefix="rl_model",
                            save_replay_buffer=False,
                            save_vecnormalize=True,
                        )

    callbacks = [ WandbCallback(
                                verbose=2,
                                log="parameters",
                                ),
                checkpoint_callback,
                ]


    from stable_baselines3.common.monitor import Monitor
    from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder

    def make_env():
        env = ObservableDeformedGridworld(
            grid_size=(1.0, 1.0),
            obstacles=obstacles,
            render_mode='human'
        )

        env = Monitor(env)  # record stats such as returns
        return env

    env = DummyVecEnv([make_env])


    net_arch=[128, 128, 128]
    model = DQN("MultiInputPolicy",env,batch_size=batch_size,gamma=gamma, 
                target_update_interval=target_update, policy_kwargs=dict(net_arch=net_arch), verbose=1,
                tensorboard_log=f"runs/{run.id}", device="cpu", learning_rate=lr,
                train_freq=(1,"episode"), gradient_steps=1)
    model.learn(total_timesteps,progress_bar=True, callback=callbacks, log_interval=1)
    model.save(f"agents/pretrained/MDP/DQNsb3_{run.id}")
    env.close()
    run.finish()

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--learning_rate", type=float, default=0.001)
    parser.add_argument("--batch_size", type=int, default=64)
    parser.add_argument("--total_timesteps", type=int, default=1000000) # env steps
    parser.add_argument("--target_update", type=int, default=555) # in env steps
    parser.add_argument("--gamma", type=float, default=0.99)
    parser.add_argument("--f", type=str, default=None)

    args = parser.parse_args()

    train_dqn(args)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmatteo-nunziante[0m. Use [1m`wandb login --relogin`[0m to force relogin


Using cpu device
Logging to runs/c7riss7l/DQN_1


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 129      |
|    ep_rew_mean      | -28      |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 1        |
|    fps              | 120      |
|    time_elapsed     | 1        |
|    total_timesteps  | 129      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 165      |
|    ep_rew_mean      | -15      |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 2        |
|    fps              | 140      |
|    time_elapsed     | 2        |
|    total_timesteps  | 330      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.12     |
|    n_updates        | 1        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 154      |
|    ep_rew_mean      | -11.1    |
|    exploration_rate | 0.996    |
| time/               |          |
|    episodes         | 3        |
|    fps              | 150      |
|    time_elapsed     | 3        |
|    total_timesteps  | 462      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0171   |
|    n_updates        | 2        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 127      |
|    ep_rew_mean      | -9.94    |
|    exploration_rate | 0.995    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 154      |
|    time_elapsed     | 3        |
|    total_timesteps  | 509      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0381   |
|    n_updates        | 3        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -10.3    |
|    exploration_rate | 0.994    |
| time/               |          |
|    episodes         | 5        |
|    fps              | 157      |
|    time_elapsed     | 3        |
|    total_timesteps  | 590      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0369   |
|    n_updates        | 4        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 132      |
|    ep_rew_mean      | -8.92    |
|    exploration_rate | 0.992    |
| time/               |          |
|    episodes         | 6        |
|    fps              | 165      |
|    time_elapsed     | 4        |
|    total_timesteps  | 791      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0513   |
|    n_updates        | 5        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 140      |
|    ep_rew_mean      | -15.1    |
|    exploration_rate | 0.991    |
| time/               |          |
|    episodes         | 7        |
|    fps              | 172      |
|    time_elapsed     | 5        |
|    total_timesteps  | 979      |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0445   |
|    n_updates        | 6        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 144      |
|    ep_rew_mean      | -14.7    |
|    exploration_rate | 0.989    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 174      |
|    time_elapsed     | 6        |
|    total_timesteps  | 1153     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0492   |
|    n_updates        | 7        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 150      |
|    ep_rew_mean      | -20      |
|    exploration_rate | 0.987    |
| time/               |          |
|    episodes         | 9        |
|    fps              | 178      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1353     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0213   |
|    n_updates        | 8        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 140      |
|    ep_rew_mean      | -19.6    |
|    exploration_rate | 0.987    |
| time/               |          |
|    episodes         | 10       |
|    fps              | 178      |
|    time_elapsed     | 7        |
|    total_timesteps  | 1396     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0584   |
|    n_updates        | 9        |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 131      |
|    ep_rew_mean      | -18.1    |
|    exploration_rate | 0.986    |
| time/               |          |
|    episodes         | 11       |
|    fps              | 179      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1439     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0573   |
|    n_updates        | 10       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 124      |
|    ep_rew_mean      | -16.8    |
|    exploration_rate | 0.986    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 179      |
|    time_elapsed     | 8        |
|    total_timesteps  | 1492     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0559   |
|    n_updates        | 11       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 130      |
|    ep_rew_mean      | -19.1    |
|    exploration_rate | 0.984    |
| time/               |          |
|    episodes         | 13       |
|    fps              | 180      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1693     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0542   |
|    n_updates        | 12       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 123      |
|    ep_rew_mean      | -17.9    |
|    exploration_rate | 0.984    |
| time/               |          |
|    episodes         | 14       |
|    fps              | 180      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1727     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0546   |
|    n_updates        | 13       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 120      |
|    ep_rew_mean      | -16.8    |
|    exploration_rate | 0.983    |
| time/               |          |
|    episodes         | 15       |
|    fps              | 180      |
|    time_elapsed     | 9        |
|    total_timesteps  | 1799     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0527   |
|    n_updates        | 14       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 125      |
|    ep_rew_mean      | -15.9    |
|    exploration_rate | 0.981    |
| time/               |          |
|    episodes         | 16       |
|    fps              | 179      |
|    time_elapsed     | 11       |
|    total_timesteps  | 2000     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.045    |
|    n_updates        | 15       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 121      |
|    ep_rew_mean      | -16.4    |
|    exploration_rate | 0.98     |
| time/               |          |
|    episodes         | 17       |
|    fps              | 180      |
|    time_elapsed     | 11       |
|    total_timesteps  | 2062     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0335   |
|    n_updates        | 16       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 119      |
|    ep_rew_mean      | -16.4    |
|    exploration_rate | 0.98     |
| time/               |          |
|    episodes         | 18       |
|    fps              | 180      |
|    time_elapsed     | 11       |
|    total_timesteps  | 2150     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.019    |
|    n_updates        | 17       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -15.6    |
|    exploration_rate | 0.979    |
| time/               |          |
|    episodes         | 19       |
|    fps              | 180      |
|    time_elapsed     | 11       |
|    total_timesteps  | 2165     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0246   |
|    n_updates        | 18       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 112      |
|    ep_rew_mean      | -15      |
|    exploration_rate | 0.979    |
| time/               |          |
|    episodes         | 20       |
|    fps              | 180      |
|    time_elapsed     | 12       |
|    total_timesteps  | 2245     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0243   |
|    n_updates        | 19       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -14.4    |
|    exploration_rate | 0.977    |
| time/               |          |
|    episodes         | 21       |
|    fps              | 180      |
|    time_elapsed     | 13       |
|    total_timesteps  | 2395     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0243   |
|    n_updates        | 20       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 112      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.977    |
| time/               |          |
|    episodes         | 22       |
|    fps              | 181      |
|    time_elapsed     | 13       |
|    total_timesteps  | 2468     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0239   |
|    n_updates        | 21       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 109      |
|    ep_rew_mean      | -13.4    |
|    exploration_rate | 0.976    |
| time/               |          |
|    episodes         | 23       |
|    fps              | 181      |
|    time_elapsed     | 13       |
|    total_timesteps  | 2508     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0241   |
|    n_updates        | 22       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 107      |
|    ep_rew_mean      | -13      |
|    exploration_rate | 0.976    |
| time/               |          |
|    episodes         | 24       |
|    fps              | 182      |
|    time_elapsed     | 14       |
|    total_timesteps  | 2571     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0236   |
|    n_updates        | 23       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 111      |
|    ep_rew_mean      | -17      |
|    exploration_rate | 0.974    |
| time/               |          |
|    episodes         | 25       |
|    fps              | 184      |
|    time_elapsed     | 14       |
|    total_timesteps  | 2772     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0231   |
|    n_updates        | 24       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 108      |
|    ep_rew_mean      | -16.8    |
|    exploration_rate | 0.973    |
| time/               |          |
|    episodes         | 26       |
|    fps              | 185      |
|    time_elapsed     | 15       |
|    total_timesteps  | 2798     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.045    |
|    n_updates        | 25       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 109      |
|    ep_rew_mean      | -16.8    |
|    exploration_rate | 0.972    |
| time/               |          |
|    episodes         | 27       |
|    fps              | 186      |
|    time_elapsed     | 15       |
|    total_timesteps  | 2941     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0446   |
|    n_updates        | 26       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 110      |
|    ep_rew_mean      | -16.3    |
|    exploration_rate | 0.971    |
| time/               |          |
|    episodes         | 28       |
|    fps              | 186      |
|    time_elapsed     | 16       |
|    total_timesteps  | 3075     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0432   |
|    n_updates        | 27       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 107      |
|    ep_rew_mean      | -15.8    |
|    exploration_rate | 0.971    |
| time/               |          |
|    episodes         | 29       |
|    fps              | 185      |
|    time_elapsed     | 16       |
|    total_timesteps  | 3090     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0346   |
|    n_updates        | 28       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 110      |
|    ep_rew_mean      | -17.1    |
|    exploration_rate | 0.969    |
| time/               |          |
|    episodes         | 30       |
|    fps              | 187      |
|    time_elapsed     | 17       |
|    total_timesteps  | 3291     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0337   |
|    n_updates        | 29       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 109      |
|    ep_rew_mean      | -17      |
|    exploration_rate | 0.968    |
| time/               |          |
|    episodes         | 31       |
|    fps              | 187      |
|    time_elapsed     | 18       |
|    total_timesteps  | 3385     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0329   |
|    n_updates        | 30       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 107      |
|    ep_rew_mean      | -16.6    |
|    exploration_rate | 0.968    |
| time/               |          |
|    episodes         | 32       |
|    fps              | 187      |
|    time_elapsed     | 18       |
|    total_timesteps  | 3417     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0391   |
|    n_updates        | 31       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 104      |
|    ep_rew_mean      | -16.3    |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes         | 33       |
|    fps              | 187      |
|    time_elapsed     | 18       |
|    total_timesteps  | 3447     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0389   |
|    n_updates        | 32       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 103      |
|    ep_rew_mean      | -16.1    |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes         | 34       |
|    fps              | 187      |
|    time_elapsed     | 18       |
|    total_timesteps  | 3500     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0341   |
|    n_updates        | 33       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 106      |
|    ep_rew_mean      | -15.7    |
|    exploration_rate | 0.965    |
| time/               |          |
|    episodes         | 35       |
|    fps              | 187      |
|    time_elapsed     | 19       |
|    total_timesteps  | 3701     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0342   |
|    n_updates        | 34       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 106      |
|    ep_rew_mean      | -15.6    |
|    exploration_rate | 0.964    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 186      |
|    time_elapsed     | 20       |
|    total_timesteps  | 3825     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0347   |
|    n_updates        | 35       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 105      |
|    ep_rew_mean      | -15.2    |
|    exploration_rate | 0.963    |
| time/               |          |
|    episodes         | 37       |
|    fps              | 186      |
|    time_elapsed     | 20       |
|    total_timesteps  | 3867     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0501   |
|    n_updates        | 36       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 107      |
|    ep_rew_mean      | -14.9    |
|    exploration_rate | 0.961    |
| time/               |          |
|    episodes         | 38       |
|    fps              | 186      |
|    time_elapsed     | 21       |
|    total_timesteps  | 4068     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0434   |
|    n_updates        | 37       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 109      |
|    ep_rew_mean      | -14.6    |
|    exploration_rate | 0.96     |
| time/               |          |
|    episodes         | 39       |
|    fps              | 186      |
|    time_elapsed     | 22       |
|    total_timesteps  | 4234     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0409   |
|    n_updates        | 38       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 109      |
|    ep_rew_mean      | -14.3    |
|    exploration_rate | 0.958    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 186      |
|    time_elapsed     | 23       |
|    total_timesteps  | 4370     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.053    |
|    n_updates        | 39       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 108      |
|    ep_rew_mean      | -14      |
|    exploration_rate | 0.958    |
| time/               |          |
|    episodes         | 41       |
|    fps              | 186      |
|    time_elapsed     | 23       |
|    total_timesteps  | 4441     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0507   |
|    n_updates        | 40       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 110      |
|    ep_rew_mean      | -13.8    |
|    exploration_rate | 0.956    |
| time/               |          |
|    episodes         | 42       |
|    fps              | 186      |
|    time_elapsed     | 24       |
|    total_timesteps  | 4608     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0484   |
|    n_updates        | 41       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 111      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.954    |
| time/               |          |
|    episodes         | 43       |
|    fps              | 185      |
|    time_elapsed     | 25       |
|    total_timesteps  | 4790     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0478   |
|    n_updates        | 42       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -13.3    |
|    exploration_rate | 0.953    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 185      |
|    time_elapsed     | 26       |
|    total_timesteps  | 4991     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0463   |
|    n_updates        | 43       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -13.1    |
|    exploration_rate | 0.951    |
| time/               |          |
|    episodes         | 45       |
|    fps              | 185      |
|    time_elapsed     | 27       |
|    total_timesteps  | 5135     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.045    |
|    n_updates        | 44       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -12.8    |
|    exploration_rate | 0.95     |
| time/               |          |
|    episodes         | 46       |
|    fps              | 185      |
|    time_elapsed     | 28       |
|    total_timesteps  | 5216     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0399   |
|    n_updates        | 45       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -12.6    |
|    exploration_rate | 0.949    |
| time/               |          |
|    episodes         | 47       |
|    fps              | 185      |
|    time_elapsed     | 29       |
|    total_timesteps  | 5417     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0397   |
|    n_updates        | 46       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.948    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 185      |
|    time_elapsed     | 29       |
|    total_timesteps  | 5447     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0396   |
|    n_updates        | 47       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 112      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.948    |
| time/               |          |
|    episodes         | 49       |
|    fps              | 185      |
|    time_elapsed     | 29       |
|    total_timesteps  | 5510     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0397   |
|    n_updates        | 48       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -12.3    |
|    exploration_rate | 0.946    |
| time/               |          |
|    episodes         | 50       |
|    fps              | 185      |
|    time_elapsed     | 30       |
|    total_timesteps  | 5674     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0397   |
|    n_updates        | 49       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -13.2    |
|    exploration_rate | 0.944    |
| time/               |          |
|    episodes         | 51       |
|    fps              | 186      |
|    time_elapsed     | 31       |
|    total_timesteps  | 5875     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0401   |
|    n_updates        | 50       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.943    |
| time/               |          |
|    episodes         | 52       |
|    fps              | 186      |
|    time_elapsed     | 32       |
|    total_timesteps  | 5984     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0434   |
|    n_updates        | 51       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -13.5    |
|    exploration_rate | 0.943    |
| time/               |          |
|    episodes         | 53       |
|    fps              | 186      |
|    time_elapsed     | 32       |
|    total_timesteps  | 6011     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0459   |
|    n_updates        | 52       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -13.5    |
|    exploration_rate | 0.941    |
| time/               |          |
|    episodes         | 54       |
|    fps              | 186      |
|    time_elapsed     | 33       |
|    total_timesteps  | 6212     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0443   |
|    n_updates        | 53       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 116      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.939    |
| time/               |          |
|    episodes         | 55       |
|    fps              | 186      |
|    time_elapsed     | 34       |
|    total_timesteps  | 6373     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0433   |
|    n_updates        | 54       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -13.8    |
|    exploration_rate | 0.939    |
| time/               |          |
|    episodes         | 56       |
|    fps              | 186      |
|    time_elapsed     | 34       |
|    total_timesteps  | 6425     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0423   |
|    n_updates        | 55       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.939    |
| time/               |          |
|    episodes         | 57       |
|    fps              | 186      |
|    time_elapsed     | 34       |
|    total_timesteps  | 6450     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0415   |
|    n_updates        | 56       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -13.4    |
|    exploration_rate | 0.937    |
| time/               |          |
|    episodes         | 58       |
|    fps              | 187      |
|    time_elapsed     | 35       |
|    total_timesteps  | 6651     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0408   |
|    n_updates        | 57       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -13.2    |
|    exploration_rate | 0.935    |
| time/               |          |
|    episodes         | 59       |
|    fps              | 187      |
|    time_elapsed     | 36       |
|    total_timesteps  | 6794     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.035    |
|    n_updates        | 58       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -13.1    |
|    exploration_rate | 0.935    |
| time/               |          |
|    episodes         | 60       |
|    fps              | 187      |
|    time_elapsed     | 36       |
|    total_timesteps  | 6811     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.037    |
|    n_updates        | 59       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -12.9    |
|    exploration_rate | 0.933    |
| time/               |          |
|    episodes         | 61       |
|    fps              | 187      |
|    time_elapsed     | 37       |
|    total_timesteps  | 7012     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0363   |
|    n_updates        | 60       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -12.9    |
|    exploration_rate | 0.932    |
| time/               |          |
|    episodes         | 62       |
|    fps              | 187      |
|    time_elapsed     | 37       |
|    total_timesteps  | 7116     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0355   |
|    n_updates        | 61       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -12.7    |
|    exploration_rate | 0.932    |
| time/               |          |
|    episodes         | 63       |
|    fps              | 187      |
|    time_elapsed     | 38       |
|    total_timesteps  | 7145     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0346   |
|    n_updates        | 62       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -12.6    |
|    exploration_rate | 0.93     |
| time/               |          |
|    episodes         | 64       |
|    fps              | 187      |
|    time_elapsed     | 39       |
|    total_timesteps  | 7324     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0336   |
|    n_updates        | 63       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.929    |
| time/               |          |
|    episodes         | 65       |
|    fps              | 187      |
|    time_elapsed     | 39       |
|    total_timesteps  | 7503     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0357   |
|    n_updates        | 64       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 117      |
|    ep_rew_mean      | -13.1    |
|    exploration_rate | 0.927    |
| time/               |          |
|    episodes         | 66       |
|    fps              | 188      |
|    time_elapsed     | 40       |
|    total_timesteps  | 7704     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0343   |
|    n_updates        | 65       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 117      |
|    ep_rew_mean      | -13      |
|    exploration_rate | 0.926    |
| time/               |          |
|    episodes         | 67       |
|    fps              | 188      |
|    time_elapsed     | 41       |
|    total_timesteps  | 7808     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.033    |
|    n_updates        | 66       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 116      |
|    ep_rew_mean      | -12.8    |
|    exploration_rate | 0.925    |
| time/               |          |
|    episodes         | 68       |
|    fps              | 188      |
|    time_elapsed     | 41       |
|    total_timesteps  | 7855     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0338   |
|    n_updates        | 67       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -12.7    |
|    exploration_rate | 0.925    |
| time/               |          |
|    episodes         | 69       |
|    fps              | 187      |
|    time_elapsed     | 41       |
|    total_timesteps  | 7876     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0503   |
|    n_updates        | 68       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 113      |
|    ep_rew_mean      | -12.5    |
|    exploration_rate | 0.925    |
| time/               |          |
|    episodes         | 70       |
|    fps              | 187      |
|    time_elapsed     | 42       |
|    total_timesteps  | 7934     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0483   |
|    n_updates        | 69       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -12.4    |
|    exploration_rate | 0.923    |
| time/               |          |
|    episodes         | 71       |
|    fps              | 187      |
|    time_elapsed     | 43       |
|    total_timesteps  | 8090     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0463   |
|    n_updates        | 70       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -12.3    |
|    exploration_rate | 0.922    |
| time/               |          |
|    episodes         | 72       |
|    fps              | 187      |
|    time_elapsed     | 43       |
|    total_timesteps  | 8202     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0445   |
|    n_updates        | 71       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -12.1    |
|    exploration_rate | 0.92     |
| time/               |          |
|    episodes         | 73       |
|    fps              | 187      |
|    time_elapsed     | 44       |
|    total_timesteps  | 8403     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.066    |
|    n_updates        | 72       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -12      |
|    exploration_rate | 0.919    |
| time/               |          |
|    episodes         | 74       |
|    fps              | 187      |
|    time_elapsed     | 45       |
|    total_timesteps  | 8544     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0677   |
|    n_updates        | 73       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 116      |
|    ep_rew_mean      | -11.9    |
|    exploration_rate | 0.918    |
| time/               |          |
|    episodes         | 75       |
|    fps              | 187      |
|    time_elapsed     | 46       |
|    total_timesteps  | 8680     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0653   |
|    n_updates        | 74       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 117      |
|    ep_rew_mean      | -12.2    |
|    exploration_rate | 0.916    |
| time/               |          |
|    episodes         | 76       |
|    fps              | 187      |
|    time_elapsed     | 47       |
|    total_timesteps  | 8881     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0627   |
|    n_updates        | 75       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -12.1    |
|    exploration_rate | 0.914    |
| time/               |          |
|    episodes         | 77       |
|    fps              | 187      |
|    time_elapsed     | 48       |
|    total_timesteps  | 9082     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0613   |
|    n_updates        | 76       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 119      |
|    ep_rew_mean      | -12.7    |
|    exploration_rate | 0.912    |
| time/               |          |
|    episodes         | 78       |
|    fps              | 188      |
|    time_elapsed     | 49       |
|    total_timesteps  | 9283     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.058    |
|    n_updates        | 77       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -12.6    |
|    exploration_rate | 0.912    |
| time/               |          |
|    episodes         | 79       |
|    fps              | 188      |
|    time_elapsed     | 49       |
|    total_timesteps  | 9313     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0658   |
|    n_updates        | 78       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 119      |
|    ep_rew_mean      | -12.6    |
|    exploration_rate | 0.91     |
| time/               |          |
|    episodes         | 80       |
|    fps              | 188      |
|    time_elapsed     | 50       |
|    total_timesteps  | 9514     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0655   |
|    n_updates        | 79       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 120      |
|    ep_rew_mean      | -12.6    |
|    exploration_rate | 0.908    |
| time/               |          |
|    episodes         | 81       |
|    fps              | 188      |
|    time_elapsed     | 51       |
|    total_timesteps  | 9715     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0637   |
|    n_updates        | 80       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 120      |
|    ep_rew_mean      | -13      |
|    exploration_rate | 0.906    |
| time/               |          |
|    episodes         | 82       |
|    fps              | 188      |
|    time_elapsed     | 52       |
|    total_timesteps  | 9851     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0614   |
|    n_updates        | 81       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 121      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.905    |
| time/               |          |
|    episodes         | 83       |
|    fps              | 188      |
|    time_elapsed     | 53       |
|    total_timesteps  | 10052    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.059    |
|    n_updates        | 82       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 122      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.903    |
| time/               |          |
|    episodes         | 84       |
|    fps              | 189      |
|    time_elapsed     | 54       |
|    total_timesteps  | 10253    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0589   |
|    n_updates        | 83       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 123      |
|    ep_rew_mean      | -14.3    |
|    exploration_rate | 0.901    |
| time/               |          |
|    episodes         | 85       |
|    fps              | 188      |
|    time_elapsed     | 55       |
|    total_timesteps  | 10454    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0626   |
|    n_updates        | 84       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 122      |
|    ep_rew_mean      | -14.2    |
|    exploration_rate | 0.9      |
| time/               |          |
|    episodes         | 86       |
|    fps              | 188      |
|    time_elapsed     | 55       |
|    total_timesteps  | 10509    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0618   |
|    n_updates        | 85       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 122      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.899    |
| time/               |          |
|    episodes         | 87       |
|    fps              | 188      |
|    time_elapsed     | 56       |
|    total_timesteps  | 10592    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0604   |
|    n_updates        | 86       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 121      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.899    |
| time/               |          |
|    episodes         | 88       |
|    fps              | 188      |
|    time_elapsed     | 56       |
|    total_timesteps  | 10679    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0565   |
|    n_updates        | 87       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 122      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.897    |
| time/               |          |
|    episodes         | 89       |
|    fps              | 188      |
|    time_elapsed     | 57       |
|    total_timesteps  | 10880    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.054    |
|    n_updates        | 88       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 122      |
|    ep_rew_mean      | -14.1    |
|    exploration_rate | 0.896    |
| time/               |          |
|    episodes         | 90       |
|    fps              | 189      |
|    time_elapsed     | 58       |
|    total_timesteps  | 10998    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0516   |
|    n_updates        | 89       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 121      |
|    ep_rew_mean      | -14      |
|    exploration_rate | 0.895    |
| time/               |          |
|    episodes         | 91       |
|    fps              | 189      |
|    time_elapsed     | 58       |
|    total_timesteps  | 11026    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0493   |
|    n_updates        | 90       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 120      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.895    |
| time/               |          |
|    episodes         | 92       |
|    fps              | 189      |
|    time_elapsed     | 58       |
|    total_timesteps  | 11055    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0468   |
|    n_updates        | 91       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 119      |
|    ep_rew_mean      | -13.8    |
|    exploration_rate | 0.895    |
| time/               |          |
|    episodes         | 93       |
|    fps              | 189      |
|    time_elapsed     | 58       |
|    total_timesteps  | 11100    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0443   |
|    n_updates        | 92       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -13.7    |
|    exploration_rate | 0.894    |
| time/               |          |
|    episodes         | 94       |
|    fps              | 189      |
|    time_elapsed     | 58       |
|    total_timesteps  | 11113    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0388   |
|    n_updates        | 93       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 117      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.894    |
| time/               |          |
|    episodes         | 95       |
|    fps              | 189      |
|    time_elapsed     | 58       |
|    total_timesteps  | 11139    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0368   |
|    n_updates        | 94       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -13.8    |
|    exploration_rate | 0.892    |
| time/               |          |
|    episodes         | 96       |
|    fps              | 189      |
|    time_elapsed     | 59       |
|    total_timesteps  | 11340    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0349   |
|    n_updates        | 95       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 119      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.89     |
| time/               |          |
|    episodes         | 97       |
|    fps              | 189      |
|    time_elapsed     | 61       |
|    total_timesteps  | 11541    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0328   |
|    n_updates        | 96       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -13.5    |
|    exploration_rate | 0.89     |
| time/               |          |
|    episodes         | 98       |
|    fps              | 189      |
|    time_elapsed     | 61       |
|    total_timesteps  | 11586    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0306   |
|    n_updates        | 97       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 119      |
|    ep_rew_mean      | -13.6    |
|    exploration_rate | 0.888    |
| time/               |          |
|    episodes         | 99       |
|    fps              | 189      |
|    time_elapsed     | 62       |
|    total_timesteps  | 11787    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0317   |
|    n_updates        | 98       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 120      |
|    ep_rew_mean      | -14      |
|    exploration_rate | 0.886    |
| time/               |          |
|    episodes         | 100      |
|    fps              | 189      |
|    time_elapsed     | 63       |
|    total_timesteps  | 11988    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0322   |
|    n_updates        | 99       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 121      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.884    |
| time/               |          |
|    episodes         | 101      |
|    fps              | 189      |
|    time_elapsed     | 64       |
|    total_timesteps  | 12189    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0365   |
|    n_updates        | 100      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 119      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.884    |
| time/               |          |
|    episodes         | 102      |
|    fps              | 189      |
|    time_elapsed     | 64       |
|    total_timesteps  | 12214    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0347   |
|    n_updates        | 101      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.883    |
| time/               |          |
|    episodes         | 103      |
|    fps              | 189      |
|    time_elapsed     | 64       |
|    total_timesteps  | 12279    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0367   |
|    n_updates        | 102      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.883    |
| time/               |          |
|    episodes         | 104      |
|    fps              | 189      |
|    time_elapsed     | 64       |
|    total_timesteps  | 12319    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.034    |
|    n_updates        | 103      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 118      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.883    |
| time/               |          |
|    episodes         | 105      |
|    fps              | 189      |
|    time_elapsed     | 65       |
|    total_timesteps  | 12357    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0324   |
|    n_updates        | 104      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 116      |
|    ep_rew_mean      | -14.5    |
|    exploration_rate | 0.882    |
| time/               |          |
|    episodes         | 106      |
|    fps              | 189      |
|    time_elapsed     | 65       |
|    total_timesteps  | 12379    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0307   |
|    n_updates        | 105      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | -14      |
|    exploration_rate | 0.881    |
| time/               |          |
|    episodes         | 107      |
|    fps              | 189      |
|    time_elapsed     | 65       |
|    total_timesteps  | 12479    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0284   |
|    n_updates        | 106      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | -13.9    |
|    exploration_rate | 0.881    |
| time/               |          |
|    episodes         | 108      |
|    fps              | 189      |
|    time_elapsed     | 65       |
|    total_timesteps  | 12503    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0259   |
|    n_updates        | 107      |
----------------------------------


error: display Surface quit

# eval

In [None]:
from stable_baselines3 import DQN

obstacles = [((0.14625, 0.3325), (0.565, 0.55625)), 
             ((0.52875, 0.5375), (0.7375, 0.84125)), 
             ((0.0, 0.00125), (0.01625, 0.99125)), 
             ((0.0075, 0.00125), (0.99875, 0.04)), 
             ((0.98875, 0.0075), (0.99875, 1.0)), 
             ((0.00125, 0.9825), (0.99875, 1.0))]

env = ObservableDeformedGridworld(
    obstacles=obstacles, 
    stretch=(.5, .5),
    shear=(.0, .0),
    render_mode="human"
)

net_arch=[128, 128, 128]
model = DQN.load("DQNsb3_qyr2tu0w.zip", env=env, policy_kwargs=dict(net_arch=net_arch), print_system_info=True)

env = ObservableDeformedGridworld(
    obstacles=obstacles, 
    stretch=(.5, .5),
    shear=(.0, .0),
    render_mode="human"
)

state, _ = env.reset(seed=np.random.randint(100))

while True:
    action, _ = model.predict(state, deterministic=True)
    state, reward, terminated,truncated, info = env.step(action)
    if terminated or truncated:
        env.close()
        break
    time.sleep(0.1)


SyntaxError: invalid syntax (108027871.py, line 33)