# Creating a Custom Gym Environment

This notebook is a very basic test framework for the custom environments created in the 
Towards Data Science article:

_[<b>Creating a Custom Gym Environment for Jupyter Notebooks</b> - <i>Part 1: Creating the framework</i>
](https://towardsdatascience.com/creating-a-custom-gym-environment-for-jupyter-notebooks-e17024474617)_


In [None]:
!pip install -q ipycanvas==0.11 -q
!pip install stable-baselines3 -q

In [None]:
# comment out the line below if running locally with source from github
!pip install --upgrade -i https://test.pypi.org/simple/ babyrobot -q

In [6]:
import gym
from stable_baselines3.common.env_checker import check_env

import babyrobot
from babyrobot.envs.lib import Actions

## Select the environment version:

In [None]:
# select the required environment version
# env_name = "BabyRobotEnv-v0"
# env_name = "BabyRobotEnv-v1"
env_name = "BabyRobotEnv-v2"

In [7]:
# setup the graphical display for evironments > v5
setup = {}
setup['grid'] = {'theme': 'black_orange'}
setup['side_panel'] = {'width':200}

# create the specified environment
env = gym.make(env_name,**setup)

# get the version from the current environment
version = env.unwrapped.spec.id.split('-')[1]
print(f"Baby Robot Version = {version}")


Baby Robot Version = v6


In [8]:
# use StableBaselines to check the environment
# - returns nothing if the environment is verified as ok
check_env(env)

In [9]:
# initialize the environment and show its initial state
env.reset()
env.render()

MultiCanvas(height=198, sync_image_data=True, width=398)

In [10]:
env.reset()
done = False
while not done:  

  # choose a random action
  action = env.action_space.sample()   

  # take the action and get the information from the environment
  new_state, reward, done, info = env.step(action)
  
  # show the current position and reward
  if version == 'v0' or version == 'v1':
    print(new_state, reward, done)
    
  elif version <= 'v4':
    env.render(action=action, reward=reward)  

  else:
    # supply an information string    
    info_str = f"{Actions(action): <5}: ({new_state}) reward = {reward}"
    env.render(info = {'side_info': [((10,10),info_str)]})  
    print(info_str)

East : ([1 0]) reward = -1
North: ([1 0]) reward = -1
North: ([1 0]) reward = -1
Stay : ([1 0]) reward = -1
West : ([0 0]) reward = -1
Stay : ([0 0]) reward = -1
East : ([1 0]) reward = -1
East : ([2 0]) reward = -1
South: ([2 1]) reward = -1
South: ([2 2]) reward = -1
