In [None]:
%load_ext autoreload
%autoreload 2

# GYM ENV

In [None]:
%%timeit -r 1 -n 1
import sys

try:
  from skdecide import DeterministicPlanningDomain
except ImportError:
  !pip install scikit-decide\[all\]

if 'google.colab'in sys.modules:
  !sudo apt-get update
  !apt-get install -y xvfb x11-utils 
  !pip install gym==0.17.* pyvirtualdisplay==0.2.* PyOpenGL==3.1.* PyOpenGL-accelerate==3.1.*

In [None]:
!rm -rf video
!mkdir video

## Proximal Policy Optimization solver
This solver is part of [Stable Baselines3](https://stable-baselines3.readthedocs.io/en/master/). It alternates between sampling data through interaction with the environment, and optimizing a "surrogate" objective function using stochastic gradient ascent.

In [None]:
import glob
import io
from base64 import b64encode

from IPython.display import HTML
from IPython import display as ipythondisplay

from pyvirtualdisplay import Display

from gym import make
from gym.wrappers import Monitor

from skdecide.hub.solver.stable_baselines import StableBaseline
from skdecide.hub.domain.gym import GymDomain
from skdecide.utils import rollout, match_solvers, load_registered_solver

from stable_baselines3 import PPO

# Create a Display
display = Display(visible=0, size=(1368, 768))
display.start()

# Create a Domain factory made of a GYM environment which is itself embedded in
# a monitoring wrapper
domain_factory = lambda: GymDomain(Monitor(make('MountainCarContinuous-v0'), "video", force=True))

# Create a Sover factory
solver_factory = lambda: StableBaseline(PPO, 'MlpPolicy', learn_config={'total_timesteps': 50000})

with solver_factory() as solver:
    # The solver is launched for calculating a policy
    GymDomain.solve_with(solver, domain_factory)
    # create a domain wrapped in a monitor for recording during rollout
    domain = domain_factory()
    # The rollout function will execute the found policy and render iterations
    try:
        rollout(domain, solver, num_episodes=1, max_steps=1000, max_framerate=None, outcome_formatter=None)
    finally:
        domain.close()

## Visualize the result

In [None]:
videofilename = glob.glob("video/openaigym.video.*.video000000.mp4")[0]
with open(videofilename,'rb') as mp4:
  data_url = "data:video/mp4;base64," + b64encode(mp4.read()).decode()
ipythondisplay.display(HTML(f"<video alt='solution movie' controls autoplay preload'><source src='{data_url}' type='video/mp4'></video>"))