# Installing scikit-decide

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# deps installation
try:
  from skdecide import DeterministicPlanningDomain
except ImportError:
  !pip install scikit-decide\[all\]
  print('Stopping RUNTIME! Please run again.')
  import os
  os.kill(os.getpid(), 9)

In [3]:
import sys

if "google.colab" in sys.modules:
  from google.colab import output

  output.enable_custom_widget_manager()

In [4]:
!rm -rf scikit-maze; git clone https://github.com/galleon/scikit-maze.git

Cloning into 'scikit-maze'...
remote: Enumerating objects: 53, done.[K
remote: Counting objects: 100% (53/53), done.[K
remote: Compressing objects: 100% (37/37), done.[K
remote: Total 53 (delta 17), reused 46 (delta 10), pack-reused 0[K
Unpacking objects: 100% (53/53), done.


In [5]:
import sys
sys.path.insert(0,'./scikit-maze')

# Solving a Maze

In [6]:
from enum import Enum
from io import BytesIO
from typing import NamedTuple, Optional, Any, List
import matplotlib.pyplot as plt

from skdecide import DeterministicPlanningDomain, Space, Value
from skdecide.builders.domain import UnrestrictedActions, Renderable
from skdecide.utils import rollout, match_solvers, load_registered_solver
from skdecide.hub.space.gym import ListSpace, EnumSpace, MultiDiscreteSpace
from skdecide.hub.solver.lazy_astar import LazyAstar

from utils import Maze

## Create a Maze

In [7]:
maze = Maze(25, 25)

In [22]:
wtf = maze.get_image()

In [9]:
plt.show(maze.get_image())

In [10]:
print(maze.maze)

[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1], [1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1], [1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1], [1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1]

## Domain definition
Define your state space (agent positions) & action space (agent movements).

In [11]:
class State(NamedTuple):
    x: int
    y: int


class Action(Enum):
    up = 0
    down = 1
    left = 2
    right = 3

Define your domain type from a base template (DeterministicPlanningDomain here) with optional refinements (UnrestrictedActions & Renderable here).



In [12]:
class D(DeterministicPlanningDomain, UnrestrictedActions, Renderable):
    T_state = State  # Type of states
    T_observation = T_state  # Type of observations
    T_event = Action  # Type of events
    T_value = float  # Type of transition values (rewards or costs)
    T_predicate = bool  # Type of logical checks
    T_info = None  # Type of additional information in environment outcome

Implement the maze domain by:
* filling all non-implemented methods
* adding a constructor to define the maze & start/end positions.

And also define (to help solvers that need it)
* an heuristic for search algorithms
* state features for width-based algorithms

In [13]:
class MazeDomain(D):
    def __init__(self, start, end, maze):
        self.start = start
        self.end = end
        self.maze = maze
        #self.image_widget = image_widget

    def _get_next_state(self, memory: D.T_state, action: D.T_event) -> D.T_state:
        # Move agent according to action (except if bumping into a wall)
        next_x, next_y = memory.x, memory.y
        if action == Action.up:
            next_x -= 1
        if action == Action.down:
            next_x += 1
        if action == Action.left:
            next_y -= 1
        if action == Action.right:
            next_y += 1
        return State(next_x, next_y) if self.maze.is_an_empty_cell(next_x, next_y) == 0 else memory

    def _get_transition_value(self, memory: D.T_state, action: D.T_event, next_state: Optional[D.T_state] = None) -> \
            Value[D.T_value]:
        # Set cost to 1 when moving (energy cost) and to 2 when bumping into a wall (damage cost)
        return Value(cost=1 if next_state != memory else 2)

    def _get_initial_state_(self) -> D.T_state:
        # Set the start position as initial state
        return self.start

    def _get_goals_(self) -> Space[D.T_observation]:
        # Set the end position as goal
        return ListSpace([self.end])

    def _is_terminal(self, state: D.T_state) -> D.T_agent[D.T_predicate]:
        # Stop an episode only when goal reached
        return self._is_goal(state)

    def _get_action_space_(self) -> Space[D.T_event]:
        # Define action space
        return EnumSpace(Action)

    def _get_observation_space_(self) -> Space[D.T_observation]:
        # Define observation space
        num_rows = maze.height
        num_cols = maze.width
        return MultiDiscreteSpace([num_rows, num_cols])

    def _render_from(self, memory: D.T_state, **kwargs: Any) -> Any:
        #  display maze in an image widget, via a matplotlib figure
        image = self.maze.get_image(memory.x, memory.y)
        with BytesIO() as f:
            image.figure.savefig(f, format="png")
            #self.image_widget.value = f.getvalue()
        
    def heuristic(self, s: D.T_state) -> Value:
        return Value(cost=sqrt((self.end.x - s.x)**2 + (self.end.y - s.y)**2))
    
    def state_features(self, s: D.T_state) -> List[float]:
        return [s.x, s.y]

Now that the domain is defined. Let's look at the solvers that can solve it. *scikit-decide* provide a method to list them

In [14]:
default_domain = MazeDomain(start=State(1,1), end=State(x=23, y=23), maze=maze)
compatible_solver_classes = match_solvers(default_domain)

[Discrete(4)]
[MultiDiscrete([25 25])]


In [15]:
compatible_solver_classes

[skdecide.hub.solver.aostar.aostar.AOstar,
 skdecide.hub.solver.astar.astar.Astar,
 skdecide.hub.solver.ars.ars.AugmentedRandomSearch,
 skdecide.hub.solver.bfws.bfws.BFWS,
 skdecide.hub.solver.ilaostar.ilaostar.ILAOstar,
 skdecide.hub.solver.iw.iw.IW,
 skdecide.hub.solver.lrtastar.lrtastar.LRTAstar,
 skdecide.hub.solver.lrtdp.lrtdp.LRTDP,
 skdecide.hub.solver.lazy_astar.lazy_astar.LazyAstar,
 skdecide.hub.solver.mahd.mahd.MAHD,
 skdecide.hub.solver.martdp.martdp.MARTDP,
 skdecide.hub.solver.mcts.mcts.MCTS,
 skdecide.hub.solver.maxent_irl.maxent_irl.MaxentIRL,
 skdecide.hub.solver.pomcp.pomcp.POMCP,
 skdecide.hub.solver.riw.riw.RIW,
 skdecide.hub.solver.ray_rllib.ray_rllib.RayRLlib,
 skdecide.hub.solver.simple_greedy.simple_greedy.SimpleGreedy,
 skdecide.hub.solver.stable_baselines.stable_baselines.StableBaseline,
 skdecide.hub.solver.mcts.mcts.UCT]

Let's try to use a first solver named A\*. A\* (pronounced "A-star") is a graph traversal and path search algorithm, which is often used in many fields of computer science due to its completeness, optimality, and optimal efficiency. 

One major practical drawback is its $O(b^d)$ space complexity, as it
stores all generated nodes in memory.

In [16]:
from skdecide.hub.solver.astar.astar import Astar

❗ We need to explain what is a domain factory and an heuristic [we cannot assume it is know from users]

In [17]:
domain_factory = lambda: MazeDomain(start=State(1,1), end=State(x=23, y=23), maze=maze)

In [18]:
solver = Astar()

In [19]:
#solver._initialize()
MazeDomain.solve_with(solver, domain_factory)

<skdecide.hub.solver.astar.astar.Astar at 0x7fe1c9c22250>

In [20]:
has_render = isinstance(default_domain, Renderable)
print(has_render)

True


In [21]:
rollout(default_domain, solver, max_steps=1000, max_framerate=80, verbose=False)

IndexError: ignored

# GYM ENV

In [5]:
import sys

try:
  from skdecide import DeterministicPlanningDomain
except ImportError:
  !pip install scikit-decide\[all\]

if 'google.colab'in sys.modules:
  !sudo apt-get update
  !apt-get install -y xvfb x11-utils 
  !pip install gym==0.17.* pyvirtualdisplay==0.2.* PyOpenGL==3.1.* PyOpenGL-accelerate==3.1.*

0% [Working]            Get:1 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
0% [Connecting to archive.ubuntu.com (91.189.88.152)] [1 InRelease 14.2 kB/88.7                                                                               Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
0% [Connecting to archive.ubuntu.com (91.189.88.152)] [1 InRelease 43.1 kB/88.7                                                                               Get:3 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Ign:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release [696 B]
Hit:6 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release.gpg

In [15]:
!apt list | grep glut



freeglut3/bionic,now 2.8.1-3 amd64 [installed,automatic]
freeglut3-dev/bionic,now 2.8.1-3 amd64 [installed,automatic]
libghc-glut-dev/bionic 2.7.0.12-1build5 amd64
libghc-glut-doc/bionic 2.7.0.12-1build5 all
libghc-glut-prof/bionic 2.7.0.12-1build5 amd64
libhugs-glut-bundled/bionic 98.200609.21-5.4build1 amd64
libkwinglutils11/bionic-updates 4:5.12.9.1-0ubuntu0.1 amd64
libmgl-glut7.5.0/bionic 2.4.1-2build2 amd64
libtaoframework-freeglut-cil-dev/bionic 2.1.svn20090801-14 all
libtaoframework-freeglut2.4-cil/bionic 2.1.svn20090801-14 all
mgltools-mglutil/bionic 1.5.7-3 all


In [6]:
!rm -rf video
!mkdir video

In [8]:
import glob
import io
from base64 import b64encode

from IPython.display import HTML
from IPython import display as ipythondisplay

from pyvirtualdisplay import Display

from gym import make
from gym.wrappers import Monitor

from skdecide.hub.solver.stable_baselines import StableBaseline
from skdecide.hub.domain.gym import GymDomain
from skdecide.utils import rollout, match_solvers, load_registered_solver

from stable_baselines3 import PPO

# Create a Display
display = Display(visible=0, size=(1368, 768))
display.start()

# Create a Domain factory made of a GYM environment which is itself embedded in
# a monitoring wrapper
domain_factory = lambda: GymDomain(Monitor(make('MountainCarContinuous-v0'), "video", force=True))

# Create a Sover factory
solver_factory = lambda: StableBaseline(PPO, 'MlpPolicy', learn_config={'total_timesteps': 50000})

with solver_factory() as solver:
    # The solver is launched for calculating a policy
    GymDomain.solve_with(solver, domain_factory)
    # create a domain wrapped in a monitor for recording during rollout
    domain = domain_factory()
    # The rollout function will execute the found policy and render iterations
    try:
        rollout(domain, solver, num_episodes=1, max_steps=1000, max_framerate=None, outcome_formatter=None)
    finally:
        domain.close()

2021-09-13 16:31:45,797 | skdecide.utils | DEBUG | Logger is in verbose mode: all debug messages will be there for you to enjoy （〜^∇^ )〜
2021-09-13 16:31:46,093 | skdecide.utils | DEBUG | Episode 1 started with following observation:
2021-09-13 16:31:46,095 | skdecide.utils | DEBUG | [-0.40964569  0.        ]
2021-09-13 16:31:46,107 | skdecide.utils | DEBUG | Action: [0.24960338]
2021-09-13 16:31:46,128 | skdecide.utils | DEBUG | Action: [0.22594573]
2021-09-13 16:31:46,144 | skdecide.utils | DEBUG | Action: [-0.32444334]
2021-09-13 16:31:46,160 | skdecide.utils | DEBUG | Action: [0.04729161]
2021-09-13 16:31:46,175 | skdecide.utils | DEBUG | Action: [0.28719]
2021-09-13 16:31:46,191 | skdecide.utils | DEBUG | Action: [0.2731321]
2021-09-13 16:31:46,205 | skdecide.utils | DEBUG | Action: [0.41581008]
2021-09-13 16:31:46,221 | skdecide.utils | DEBUG | Action: [-0.19412215]
2021-09-13 16:31:46,237 | skdecide.utils | DEBUG | Action: [0.13552327]
2021-09-13 16:31:46,252 | skdecide.utils | 

In [9]:
videofilename = glob.glob("video/openaigym.video.*.video000000.mp4")[0]
with open(videofilename,'rb') as mp4:
  data_url = "data:video/mp4;base64," + b64encode(mp4.read()).decode()
ipythondisplay.display(HTML(f"<video alt='solution movie' controls autoplay preload'><source src='{data_url}' type='video/mp4'></video>"))