# Installing scikit-decide

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# deps installation
try:
  from skdecide import DeterministicPlanningDomain
except ImportError:
  !pip install scikit-decide[all]
  print('Stopping RUNTIME! Please run again.')
  import os
  os.kill(os.getpid(), 9)

In [5]:
!git clone https://github.com/galleon/scikit-maze.git

Cloning into 'scikit-maze'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 28 (delta 6), reused 26 (delta 4), pack-reused 0[K
Receiving objects: 100% (28/28), 11.62 KiB | 2.32 MiB/s, done.
Resolving deltas: 100% (6/6), done.


In [4]:
import sys
sys.path.insert(0,'./scikit-maze')

# Solving a Maze

In [3]:
from copy import deepcopy
from enum import Enum
from io import BytesIO
import matplotlib.pyplot as plt
from typing import NamedTuple, Optional, Any, List

from skdecide import DeterministicPlanningDomain, Space, Value
from skdecide.builders.domain import UnrestrictedActions, Renderable
from skdecide.utils import rollout, match_solvers, load_registered_solver
from skdecide.hub.space.gym import ListSpace, EnumSpace, MultiDiscreteSpace
from skdecide.hub.solver.lazy_astar import LazyAstar

from utils import Maze

## Create a Maze

In [4]:
maze = Maze(25, 25)

In [6]:
image = maze.get_image(1, 1)

NameError: name 'maze' is not defined

## Domain definition
Define your state space (agent positions) & action space (agent movements).

In [5]:
class State(NamedTuple):
    x: int
    y: int


class Action(Enum):
    up = 0
    down = 1
    left = 2
    right = 3

Define your domain type from a base template (DeterministicPlanningDomain here) with optional refinements (UnrestrictedActions & Renderable here).



In [8]:
class D(DeterministicPlanningDomain, UnrestrictedActions, Renderable):
    T_state = State  # Type of states
    T_observation = T_state  # Type of observations
    T_event = Action  # Type of events
    T_value = float  # Type of transition values (rewards or costs)
    T_predicate = bool  # Type of logical checks
    T_info = None  # Type of additional information in environment outcome

Implement the maze domain by:
* filling all non-implemented methods
* adding a constructor to define the maze & start/end positions.

And also define (to help solvers that need it)
* an heuristic for search algorithms
* state features for width-based algorithms

In [9]:
empty_cell = " "

class MazeDomain(D):
    def __init__(self, start, end, maze_str, image_widget=None):
        self.start = start
        self.end = end
        self.maze_str = maze_str.strip()
        self.maze = self.maze_str.splitlines()
        self.image_widget = image_widget

        # for rendering
        rendered_maze = []
        for line in self.maze:
            row = []
            for c in line:
                if c == empty_cell:
                    row.append(1)
                else:
                    row.append(0)
            if len(row) > 0:
                rendered_maze.append(row)
        rendered_maze[self.end.x][self.end.y] = 0.7
        self.rendered_maze = rendered_maze

        self._image = None

    def _get_next_state(self, memory: D.T_state, action: D.T_event) -> D.T_state:
        # Move agent according to action (except if bumping into a wall)
        next_x, next_y = memory.x, memory.y
        if action == Action.up:
            next_x -= 1
        if action == Action.down:
            next_x += 1
        if action == Action.left:
            next_y -= 1
        if action == Action.right:
            next_y += 1
        return State(next_x, next_y) if self.maze[next_x][next_y] == empty_cell else memory

    def _get_transition_value(self, memory: D.T_state, action: D.T_event, next_state: Optional[D.T_state] = None) -> \
            Value[D.T_value]:
        # Set cost to 1 when moving (energy cost) and to 2 when bumping into a wall (damage cost)
        return Value(cost=1 if next_state != memory else 2)

    def _get_initial_state_(self) -> D.T_state:
        # Set the start position as initial state
        return self.start

    def _get_goals_(self) -> Space[D.T_observation]:
        # Set the end position as goal
        return ListSpace([self.end])

    def _is_terminal(self, state: D.T_state) -> D.T_agent[D.T_predicate]:
        # Stop an episode only when goal reached
        return self._is_goal(state)

    def _get_action_space_(self) -> Space[D.T_event]:
        # Define action space
        return EnumSpace(Action)

    def _get_observation_space_(self) -> Space[D.T_observation]:
        # Define observation space
        num_rows = len(self.maze)
        num_cols = max([len(row) for row in self.maze])
        return MultiDiscreteSpace([num_rows, num_cols])

    def _render_from(self, memory: D.T_state, **kwargs: Any) -> Any:
        #  display maze in an image widget, via a matplotlib figure
        maze = deepcopy(self.rendered_maze)
        maze[memory.x][memory.y] = 0.3
        if self._image is None:
            plt.ioff()
            fig, ax = plt.subplots(1)
            ax.set_aspect('equal')  # set the x and y axes to the same scale
            plt.xticks([])  # remove the tick marks by setting to an empty list
            plt.yticks([])  # remove the tick marks by setting to an empty list
            ax.invert_yaxis()  # invert the y-axis so the first row of data is at the top
            plt.ion()
            fig.canvas.header_visible = False
            fig.canvas.footer_visible = False
            fig.canvas.resizable = False
            fig.set_dpi(1)
            fig.set_figwidth(600)
            fig.set_figheight(600)
            self._image = ax.imshow(maze)
        else:
            self._image.set_data(maze)
        with BytesIO() as f:
            self._image.figure.savefig(f, format="png")
            self.image_widget.value = f.getvalue()
        
    def heuristic(self, s: D.T_state) -> Value:
        return Value(cost=sqrt((self.end.x - s.x)**2 + (self.end.y - s.y)**2))
    
    def state_features(self, s: D.T_state) -> List[float]:
        return [s.x, s.y]

Now that the domain is defined. Let's look at the solvers that can solve it. *scikit-decide* provide a method to list them

In [11]:
default_domain = MazeDomain(start=State(1,1), end=State(x=23, y=23), maze_str=maze.get_str())
compatible_solver_classes = match_solvers(default_domain)

[Discrete(4)]
[MultiDiscrete([25 25])]


In [12]:
compatible_solver_classes

[skdecide.hub.solver.aostar.aostar.AOstar,
 skdecide.hub.solver.astar.astar.Astar,
 skdecide.hub.solver.ars.ars.AugmentedRandomSearch,
 skdecide.hub.solver.bfws.bfws.BFWS,
 skdecide.hub.solver.ilaostar.ilaostar.ILAOstar,
 skdecide.hub.solver.iw.iw.IW,
 skdecide.hub.solver.lrtastar.lrtastar.LRTAstar,
 skdecide.hub.solver.lrtdp.lrtdp.LRTDP,
 skdecide.hub.solver.lazy_astar.lazy_astar.LazyAstar,
 skdecide.hub.solver.mahd.mahd.MAHD,
 skdecide.hub.solver.martdp.martdp.MARTDP,
 skdecide.hub.solver.mcts.mcts.MCTS,
 skdecide.hub.solver.maxent_irl.maxent_irl.MaxentIRL,
 skdecide.hub.solver.pomcp.pomcp.POMCP,
 skdecide.hub.solver.riw.riw.RIW,
 skdecide.hub.solver.ray_rllib.ray_rllib.RayRLlib,
 skdecide.hub.solver.simple_greedy.simple_greedy.SimpleGreedy,
 skdecide.hub.solver.stable_baselines.stable_baselines.StableBaseline,
 skdecide.hub.solver.mcts.mcts.UCT]

Let's try to use a first solver named A\*. A\* (pronounced "A-star") is a graph traversal and path search algorithm, which is often used in many fields of computer science due to its completeness, optimality, and optimal efficiency. 

One major practical drawback is its $O(b^d)$ space complexity, as it
stores all generated nodes in memory.

In [13]:
from skdecide.hub.solver.astar.astar import Astar

❗ We need to explain what is a domain factory and an heuristic [we cannot assume it is know from users]

In [14]:
domain_factory = lambda: MazeDomain(start=State(1,1), end=State(x=23, y=23), maze_str=maze.get_str())

In [15]:
solver = Astar()

In [16]:
#solver._initialize()
MazeDomain.solve_with(solver, domain_factory)

<skdecide.hub.solver.astar.astar.Astar at 0x7f6d562c4910>

In [17]:
rollout(default_domain, solver, max_steps=1000, max_framerate=80, verbose=False)

AttributeError: ignored