# Creating a Custom Gym Environment

This notebook is a very basic test framework for the custom environments created in the 
Towards Data Science article:

_[<b>Creating a Custom Gym Environment for Jupyter Notebooks</b> - <i>Part 1: Creating the framework</i>
](https://towardsdatascience.com/creating-a-custom-gym-environment-for-jupyter-notebooks-e17024474617)_


In [7]:
# comment out the line below if running locally with source from github
%pip install --upgrade babyrobot -q

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
stable-baselines3 1.6.2 requires gym==0.21, but you have gym 0.25.2 which is incompatible.[0m
Note: you may need to restart the kernel to use updated packages.


In [8]:
import gym
import babyrobot
from babyrobot.envs.lib import Actions

## Select the environment version:

In [9]:
# select the required environment version
# env_name = "BabyRobotEnv-v0"
# env_name = "BabyRobotEnv-v1"
env_name = "BabyRobotEnv-v2"

In [10]:
# setup the graphical display for evironments > v5
setup = {}
setup['grid'] = {'theme': 'black_orange'}
setup['side_panel'] = {'width':200}

# create the specified environment
env = babyrobot.make(env_name,**setup)

# get the version from the current environment
version = env.unwrapped.spec.id.split('-')[1]
print(f"Baby Robot Version = {version}")


Baby Robot Version = v2


In [5]:
# initialize the environment and show its initial state
env.reset()
env.render()

Stay : (0,0) reward = 0


In [6]:
env.reset()
done = False
while not done:  

  # choose a random action
  action = env.action_space.sample()   

  # take the action and get the information from the environment  
  new_state, reward, done, truncated, info = env.step(action)
  
  # show the current position and reward
  if version == 'v0' or version == 'v1':
    print(new_state, reward, done)
    
  elif version <= 'v4':
    env.render(action=action, reward=reward)  

  else:
    # supply an information string    
    info_str = f"{Actions(action): <5}: ({new_state}) reward = {reward}"
    env.render(info = {'side_info': [((10,10),info_str)]})  
    print(info_str)

South: (0,1) reward = -1
North: (0,0) reward = -1
Stay : (0,0) reward = -1
East : (1,0) reward = -1
South: (1,1) reward = -1
East : (2,1) reward = -1
West : (1,1) reward = -1
Stay : (1,1) reward = -1
West : (0,1) reward = -1
Stay : (0,1) reward = -1
North: (0,0) reward = -1
North: (0,0) reward = -1
South: (0,1) reward = -1
East : (1,1) reward = -1
East : (2,1) reward = -1
West : (1,1) reward = -1
North: (1,0) reward = -1
West : (0,0) reward = -1
South: (0,1) reward = -1
East : (1,1) reward = -1
West : (0,1) reward = -1
Stay : (0,1) reward = -1
North: (0,0) reward = -1
West : (0,0) reward = -1
Stay : (0,0) reward = -1
East : (1,0) reward = -1
North: (1,0) reward = -1
South: (1,1) reward = -1
North: (1,0) reward = -1
Stay : (1,0) reward = -1
Stay : (1,0) reward = -1
East : (2,0) reward = -1
West : (1,0) reward = -1
Stay : (1,0) reward = -1
North: (1,0) reward = -1
East : (2,0) reward = -1
West : (1,0) reward = -1
South: (1,1) reward = -1
West : (0,1) reward = -1
South: (0,2) reward = -1
