I'm getting a value error when training `Knapsack-v1` that's preventing the model from training. This appears to be due to the observation state raising an error when passed to the `Preprocessor`. The first two cells below will recreate the error and traceback.

In [1]:
import gym
import numpy as np
import or_gym
from or_gym.algos import rl_utils
from datetime import datetime
import os
import time
import ray
from ray.rllib.agents import ppo
from ray.rllib.models import preprocessors

In [2]:
ray.init(ignore_reinit_error=True)

env_name = 'Knapsack-v1'
trainer = ppo.PPOTrainer(env=rl_utils.create_env(env_name), 
    config={
    "env_config": {"reuse_actors":True},
    "vf_clip_param": 5000,
    "model": {
        "fcnet_activation": "elu",
        "fcnet_hiddens": [128, 128, 128]}
    })

# Run x number of episodes
N_EPS = 1000
rewards = []
eps, eps_total = [], []
training = True
batch = 0
t_start = time.time()
while training:
    t_batch = time.time()
    results = trainer.train()
    rewards.append(results['episode_reward_mean'])
    eps.append(results['episodes_this_iter'])
    eps_total.append(results['episodes_total'])
    batch += 1
    t_end = time.time()
    if sum(eps) >= N_EPS:
        training = False
        break
    if batch % 10 == 0:
        t = t_end - t_batch
        t_tot = t_end - t_start
        print("\rEpisode: {}\tMean Rewards: {:.1f}\tEpisodes/sec: {:.2f}s\tTotal Time: {:.1f}s".format(
            eps_total[-1], rewards[-1], eps[-1]/t, t_tot), end="")
        
print("Total Training Time: {:.1f}s\t".format(t_end - t_start))

2020-03-26 15:27:10,923	INFO resource_spec.py:216 -- Starting Ray with 2.98 GiB memory available for workers and up to 1.51 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-03-26 15:27:11,412	INFO trainer.py:371 -- Tip: set 'eager': true or the --eager flag to enable TensorFlow eager execution
2020-03-26 15:27:11,446	INFO trainer.py:512 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Environment	Knapsack-v1


2020-03-26 15:28:07,486	INFO trainable.py:102 -- _setup took 56.042 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Total Training Time: 34.0s	


The environment has a state that is a $3 \times 201$ numpy array. The error prints out the state array. I've copy and pasted that exact array which raises the error below and passed it through the preprocessor used in the model to try to raise the error.

You can find the [preprocessor code here](https://github.com/ray-project/ray/blob/c1b05b720ded1338d82f50f5c2df050930ccd350/rllib/models/preprocessors.py#L17).


I have attempted to recreate this error below by extracting the preprocessor and running 1,000 episodes with random actions through the preprocessor to see if I can isolate

In [5]:
# Get preprocessor
preprocessor = trainer.workers.local_worker().preprocessors['default_policy']
sample_obs = np.array([[  4,  13,  15,   1,   5,   4,  14,  12,  14,  14,  12,  17,  15,
         17,   2,   9,   1,   5,   7,  14,   8,  16,  10,  19,   9,  16,
         12,   7,  16,   2,  13,   4,  19,  16,   4,  11,  13,   7,   4,
          6,  12,   1,  12,   9,  11,  12,   6,  16,   9,   3,  15,   4,
          4,   8,  10,  10,  10,   4,  15,  19,  13,   4,  10,  11,  14,
          8,   8,   2,  13,   3,   3,   2,   6,   9,   5,   1,  12,   3,
          6,  17,   9,   2,  18,  17,   4,   9,   5,  17,   4,   8,   4,
         19,  18,   2,  19,  18,   3,   6,   6,  13,  13,   3,  16,   8,
          8,  12,  18,   7,   8,  19,  12,  18,  10,  15,  10,  19,   1,
         10,  12,  18,  10,   1,  17,  11,   5,   4,   9,  15,   1,   9,
          8,  14,  18,   1,  12,   5,   1,  11,  15,  19,   5,   4,   8,
          9,  14,   6,   1,   9,  16,  16,  12,   5,   8,  14,  11,  15,
         18,   4,  10,  19,   3,  12,  19,  14,   2,   7,  11,  17,  17,
          4,  18,  11,  10,  13,  14,   7,   7,   8,   9,  14,   8,  17,
         16,   9,   4,   7,  18,   8,   3,  17,  19,  14,   5,   5,  13,
         18,   2,   6,   2,   8, 200],
       [ 12,  15,  21,   0,   3,  27,   3,   7,   9,  19,  21,  18,   4,
         23,   6,  24,  24,  12,  26,   1,   6,   7,  23,  14,  24,  17,
          5,  25,  13,   8,   9,  20,  19,  16,  19,   5,  15,  15,   0,
         18,   3,  24,  17,  19,  29,  19,  19,  14,   7,   0,   1,   9,
         25,   0,  10,  20,  23,   3,  11,  18,  23,  28,   2,   0,   0,
          4,  21,   5,   6,   8,  20,  17,  15,   4,   9,  10,  26,  24,
          1,   1,   7,   9,  25,   3,   6,  23,  11,  14,  18,  27,   0,
         14,   3,  21,  12,  25,  10,  20,  11,   4,   6,   4,  15,  20,
          3,  12,   4,  20,   8,  14,  15,  20,   3,  26,  23,  15,  13,
         21,  21,  16,  17,   5,   9,   3,   0,   5,   0,  17,  18,   4,
          2,  16,  29,   3,   2,  10,  13,  16,   7,  21,   9,   0,  10,
         18,  11,  26,  23,  27,   2,  25,   2,   3,  27,   3,  18,  14,
          3,  20,  17,  18,  27,  14,   9,  26,   1,   4,  10,  22,  11,
          8,  11,   2,  19,  16,   0,  22,   0,   6,  19,  14,  10,  19,
         24,  28,   8,  13,  24,  29,   2,   3,   2,  11,  13,  16,   8,
          8,  19,   8,  26,   2, 149],
       [  2,   7,   0,   4,   9,   5,   2,   5,   6,  -1,   4,   2,   5,
          5,   5,   1,   1,   9,   5,   6,   4,   4,   3,   2,   2,   2,
          4,   4,   2,   2,   1,   8,   9,   5,   4,   6,   7,   4,   3,
          9,   2,   5,   1,   9,   4,   6,   6,   2,   8,   9,   7,   5,
          8,   4,   6,   3,   7,   5,   8,   4,   1,   6,   4,   8,   6,
          5,   9,   1,   9,   4,   6,   4,   3,   8,   1,   4,   1,   4,
          7,   2,   3,   5,   2,   4,   3,   5,   8,   5,   5,   2,   3,
          8,   3,   4,   8,   7,   7,   3,   4,   7,   1,   8,   0,   8,
          7,   6,   7,   6,   3,   8,   2,   3,   3,   6,   7,   5,   3,
          3,   2,   1,   1,   3,   9,   4,   1,   9,   9,   2,   1,   6,
          9,   3,   4,   6,   4,   9,   7,   5,   7,   4,   7,   3,   7,
          6,   6,   5,   7,   5,   2,   4,   4,   9,   6,   6,   7,   1,
          8,   6,   2,   6,   7,   6,   9,   8,   6,   4,   3,   4,   3,
          6,   5,   2,   6,   9,   4,   6,   9,   5,   2,   8,   9,   2,
          2,   2,   2,   8,   6,   1,   5,   2,   2,   7,   7,   1,   2,
          4,   8,   3,   5,   1,   0]])

In [8]:
preprocessor._i = 0
preprocessor.check_shape(sample_obs)

ValueError: ('Observation outside expected value range', Box(3, 201), array([[  4,  13,  15,   1,   5,   4,  14,  12,  14,  14,  12,  17,  15,
         17,   2,   9,   1,   5,   7,  14,   8,  16,  10,  19,   9,  16,
         12,   7,  16,   2,  13,   4,  19,  16,   4,  11,  13,   7,   4,
          6,  12,   1,  12,   9,  11,  12,   6,  16,   9,   3,  15,   4,
          4,   8,  10,  10,  10,   4,  15,  19,  13,   4,  10,  11,  14,
          8,   8,   2,  13,   3,   3,   2,   6,   9,   5,   1,  12,   3,
          6,  17,   9,   2,  18,  17,   4,   9,   5,  17,   4,   8,   4,
         19,  18,   2,  19,  18,   3,   6,   6,  13,  13,   3,  16,   8,
          8,  12,  18,   7,   8,  19,  12,  18,  10,  15,  10,  19,   1,
         10,  12,  18,  10,   1,  17,  11,   5,   4,   9,  15,   1,   9,
          8,  14,  18,   1,  12,   5,   1,  11,  15,  19,   5,   4,   8,
          9,  14,   6,   1,   9,  16,  16,  12,   5,   8,  14,  11,  15,
         18,   4,  10,  19,   3,  12,  19,  14,   2,   7,  11,  17,  17,
          4,  18,  11,  10,  13,  14,   7,   7,   8,   9,  14,   8,  17,
         16,   9,   4,   7,  18,   8,   3,  17,  19,  14,   5,   5,  13,
         18,   2,   6,   2,   8, 200],
       [ 12,  15,  21,   0,   3,  27,   3,   7,   9,  19,  21,  18,   4,
         23,   6,  24,  24,  12,  26,   1,   6,   7,  23,  14,  24,  17,
          5,  25,  13,   8,   9,  20,  19,  16,  19,   5,  15,  15,   0,
         18,   3,  24,  17,  19,  29,  19,  19,  14,   7,   0,   1,   9,
         25,   0,  10,  20,  23,   3,  11,  18,  23,  28,   2,   0,   0,
          4,  21,   5,   6,   8,  20,  17,  15,   4,   9,  10,  26,  24,
          1,   1,   7,   9,  25,   3,   6,  23,  11,  14,  18,  27,   0,
         14,   3,  21,  12,  25,  10,  20,  11,   4,   6,   4,  15,  20,
          3,  12,   4,  20,   8,  14,  15,  20,   3,  26,  23,  15,  13,
         21,  21,  16,  17,   5,   9,   3,   0,   5,   0,  17,  18,   4,
          2,  16,  29,   3,   2,  10,  13,  16,   7,  21,   9,   0,  10,
         18,  11,  26,  23,  27,   2,  25,   2,   3,  27,   3,  18,  14,
          3,  20,  17,  18,  27,  14,   9,  26,   1,   4,  10,  22,  11,
          8,  11,   2,  19,  16,   0,  22,   0,   6,  19,  14,  10,  19,
         24,  28,   8,  13,  24,  29,   2,   3,   2,  11,  13,  16,   8,
          8,  19,   8,  26,   2, 149],
       [  2,   7,   0,   4,   9,   5,   2,   5,   6,  -1,   4,   2,   5,
          5,   5,   1,   1,   9,   5,   6,   4,   4,   3,   2,   2,   2,
          4,   4,   2,   2,   1,   8,   9,   5,   4,   6,   7,   4,   3,
          9,   2,   5,   1,   9,   4,   6,   6,   2,   8,   9,   7,   5,
          8,   4,   6,   3,   7,   5,   8,   4,   1,   6,   4,   8,   6,
          5,   9,   1,   9,   4,   6,   4,   3,   8,   1,   4,   1,   4,
          7,   2,   3,   5,   2,   4,   3,   5,   8,   5,   5,   2,   3,
          8,   3,   4,   8,   7,   7,   3,   4,   7,   1,   8,   0,   8,
          7,   6,   7,   6,   3,   8,   2,   3,   3,   6,   7,   5,   3,
          3,   2,   1,   1,   3,   9,   4,   1,   9,   9,   2,   1,   6,
          9,   3,   4,   6,   4,   9,   7,   5,   7,   4,   7,   3,   7,
          6,   6,   5,   7,   5,   2,   4,   4,   9,   6,   6,   7,   1,
          8,   6,   2,   6,   7,   6,   9,   8,   6,   4,   3,   4,   3,
          6,   5,   2,   6,   9,   4,   6,   9,   5,   2,   8,   9,   2,
          2,   2,   2,   8,   6,   1,   5,   2,   2,   7,   7,   1,   2,
          4,   8,   3,   5,   1,   0]]))

In [10]:
env = gym.make(env_name)
env.observation_space.contains(sample_obs)

False

In [44]:
preprocessor._obs_space

Box(3, 201)

In [45]:
def _check_shape(preprocessor, state):
#     if y._i % VALIDATION_INTERVAL == 0:
    if type(state) is list and isinstance(y._obs_space, gym.spaces.Box):
        state = np.array(state)
    try:
        if not preprocessor._obs_space.contains(state):
            raise ValueError(
            "Observation outside expected value range",
            preprocessor._obs_space, state)
    except AttributeError:
        raise ValueError("message")
    preprocessor._i += 1

In [46]:
_check_shape(preprocessor, env.state)

In [48]:
env = gym.make(env_name)
for i in range(1000):
    done = False
    state = env.reset()
    while done == False:
        action = env.sample_action()
        state, reward, done, _ = env.step(action)
        preprocessor.check_shape(state)
#         _check_shape(preprocessor, state)

In [18]:
x = np.array([[  4,  13,  15,   1,   5,   4,  14,  12,  14,  14,  12,  17,  15,
         17,   2,   9,   1,   5,   7,  14,   8,  16,  10,  19,   9,  16,
         12,   7,  16,   2,  13,   4,  19,  16,   4,  11,  13,   7,   4,
          6,  12,   1,  12,   9,  11,  12,   6,  16,   9,   3,  15,   4,
          4,   8,  10,  10,  10,   4,  15,  19,  13,   4,  10,  11,  14,
          8,   8,   2,  13,   3,   3,   2,   6,   9,   5,   1,  12,   3,
          6,  17,   9,   2,  18,  17,   4,   9,   5,  17,   4,   8,   4,
         19,  18,   2,  19,  18,   3,   6,   6,  13,  13,   3,  16,   8,
          8,  12,  18,   7,   8,  19,  12,  18,  10,  15,  10,  19,   1,
         10,  12,  18,  10,   1,  17,  11,   5,   4,   9,  15,   1,   9,
          8,  14,  18,   1,  12,   5,   1,  11,  15,  19,   5,   4,   8,
          9,  14,   6,   1,   9,  16,  16,  12,   5,   8,  14,  11,  15,
         18,   4,  10,  19,   3,  12,  19,  14,   2,   7,  11,  17,  17,
          4,  18,  11,  10,  13,  14,   7,   7,   8,   9,  14,   8,  17,
         16,   9,   4,   7,  18,   8,   3,  17,  19,  14,   5,   5,  13,
         18,   2,   6,   2,   8, 200],
       [ 12,  15,  21,   0,   3,  27,   3,   7,   9,  19,  21,  18,   4,
         23,   6,  24,  24,  12,  26,   1,   6,   7,  23,  14,  24,  17,
          5,  25,  13,   8,   9,  20,  19,  16,  19,   5,  15,  15,   0,
         18,   3,  24,  17,  19,  29,  19,  19,  14,   7,   0,   1,   9,
         25,   0,  10,  20,  23,   3,  11,  18,  23,  28,   2,   0,   0,
          4,  21,   5,   6,   8,  20,  17,  15,   4,   9,  10,  26,  24,
          1,   1,   7,   9,  25,   3,   6,  23,  11,  14,  18,  27,   0,
         14,   3,  21,  12,  25,  10,  20,  11,   4,   6,   4,  15,  20,
          3,  12,   4,  20,   8,  14,  15,  20,   3,  26,  23,  15,  13,
         21,  21,  16,  17,   5,   9,   3,   0,   5,   0,  17,  18,   4,
          2,  16,  29,   3,   2,  10,  13,  16,   7,  21,   9,   0,  10,
         18,  11,  26,  23,  27,   2,  25,   2,   3,  27,   3,  18,  14,
          3,  20,  17,  18,  27,  14,   9,  26,   1,   4,  10,  22,  11,
          8,  11,   2,  19,  16,   0,  22,   0,   6,  19,  14,  10,  19,
         24,  28,   8,  13,  24,  29,   2,   3,   2,  11,  13,  16,   8,
          8,  19,   8,  26,   2,   8],
       [  2,   7,   1,   4,   9,   5,   2,   5,   6,   1,   4,   2,   5,
          5,   5,   1,   1,   9,   5,   7,   4,   4,   3,   2,   3,   2,
          4,   5,   2,   2,   1,   8,   9,   5,   4,   6,   7,   4,   3,
          9,   2,   5,   1,   9,   4,   6,   6,   2,   8,   9,   7,   5,
          8,   4,   6,   4,   7,   5,   8,   4,   1,   6,   4,   8,   6,
          6,   9,   1,   9,   4,   7,  -1,   3,   8,   1,   4,   1,   4,
          7,   2,   3,   5,   2,   4,   3,   5,   8,   5,   5,   2,   3,
          8,   3,   4,   8,   7,   7,   3,   4,   7,   1,   9,   1,   8,
          7,   6,   7,   6,   3,   8,   2,   3,   3,   6,   7,   5,   3,
          3,   2,   1,   1,   3,   9,   4,   1,   9,   9,   2,   1,   6,
          9,   3,   4,   6,   4,   9,   7,   5,   7,   4,   7,   3,   7,
          6,   6,   5,   7,   6,   2,   4,   4,   9,   6,   6,   7,   1,
          8,   6,   2,   6,   7,   7,   9,   8,   6,   4,   3,   4,   3,
          6,   5,   2,   6,   9,   4,   6,   9,   5,   2,   8,   9,   2,
          3,   2,   2,   8,   6,   1,   5,   2,   2,   7,   7,   1,   3,
          4,   8,   3,   5,   1,   0]])
x.shape

(3, 201)

In [20]:
env.observation_space

(3, 201)

In [10]:
env = gym.make(env_name)

In [11]:
env.observation_space

Box(3, 201)

In [12]:
x.max()

200

In [9]:
def bkp_heuristic(env):
    assert env.spec.id == 'Knapsack-v1', \
        '{} received. Heuristic designed for Knapsack-v1.'.format(env.spec.id)
    env.reset()

    # Get value-weight ratios
    vw_ratio = env.item_values / env.item_weights
    vw_order = env.item_numbers[np.argsort(vw_ratio)[::-1]]
    actions = []
    rewards = []
    done = False
    while not done:
        # Check that max item is available
        max_item = vw_order[0]
        if env.item_limits[max_item] == 0:
            # Remove item from list
            vw_order = vw_order[1:].copy()
            continue
        # Check that item fits
        if env.item_weights[max_item] > (env.max_weight - env.current_weight):
            # Remove item from list
            vw_order = vw_order[1:].copy()
            continue
        # Select max_item
        state, reward, done, _ = env.step(max_item)
        assert env.observation_space.contains(state)
        actions.append(max_item)
        rewards.append(reward)
        
    return actions, rewards

In [11]:
a, r = bkp_heuristic(gym.make(env_name))

In [16]:
env.sample_action()

165

In [17]:
env = gym.make(env_name)
for i in range(1000):
    done = False
    state = env.reset()
    while done == False:
        action = env.sample_action()
        state, reward, done, _ = env.step(action)
        assert env.observation_space.contains(state)

In [2]:
env = gym.make('VMPacking-v1')

In [4]:
obs = (np.array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.55098013e-01,  4.57142860e-01],
       [ 1.00000000e+00,  1.80095343e-01,  1.14285710e-01],
       [ 1.00000000e+00,  1.68639428e-01,  4.57142860e-01],
       [ 1.00000000e+00,  1.45168387e-01,  5.71428600e-02],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.60860395e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  3.06611964e-01,  1.71428570e-01],
       [ 1.00000000e+00,  1.59571429e-01,  4.57142860e-01],
       [ 1.00000000e+00,  1.73140948e-01,  5.71428600e-02],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  6.56247413e-01,  7.14285710e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.44259654e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.70548587e-01,  4.57142860e-01],
       [ 1.00000000e+00,  1.74826422e-01,  1.14285710e-01],
       [ 1.00000000e+00,  1.29823027e-01,  1.14285710e-01],
       [ 1.00000000e+00,  1.48445172e-01,  5.71428600e-02],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  5.03358110e-01,  6.00000000e-01],
       [ 1.00000000e+00,  1.70531184e-01,  4.57142860e-01],
       [ 1.00000000e+00,  3.16336877e-01,  5.14285720e-01],
       [ 1.00000000e+00,  1.47469827e-01,  4.57142860e-01],
       [ 1.00000000e+00,  3.10112246e-01,  2.28571420e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.61752004e-01,  1.14285710e-01],
       [ 1.00000000e+00,  1.70775540e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.57151331e-01,  2.85714300e-02],
       [ 1.00000000e+00,  3.26866391e-01,  1.71428570e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  3.22780079e-01,  1.71428570e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.66847336e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.34825816e-01,  5.71428600e-02],
       [ 1.00000000e+00,  1.57145190e-01,  5.71428600e-02],
       [ 1.00000000e+00,  1.64246060e-01,  5.71428600e-02],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.56915612e-01,  1.14285710e-01],
       [ 1.00000000e+00,  1.27940899e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  3.03802183e-01,  9.14285720e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.34844522e-01,  5.71428600e-02],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.49018372e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.75761829e-01,  5.71428600e-02],
       [ 1.00000000e+00, -2.77555756e-17,  5.55111512e-17],
       [ 1.00000000e+00,  1.58064302e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.44583625e-01,  9.14285710e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  3.55295599e-01,  5.71428570e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  3.38368611e-01,  5.14285720e-01],
       [ 1.00000000e+00,  1.67349985e-01,  4.57142860e-01],
       [ 1.00000000e+00,  1.46163657e-01,  1.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  3.18188350e-01,  9.14285720e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.68547386e-01,  2.85714300e-02],
       [ 1.00000000e+00,  1.54996571e-01,  1.14285710e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.63522740e-01,  2.85714300e-02],
       [ 1.00000000e+00,  1.85800071e-01,  4.57142860e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  1.62238654e-01,  1.14285710e-01],
       [ 1.00000000e+00,  3.27169275e-01,  2.28571420e-01],
       [ 1.00000000e+00,  1.58042847e-01,  5.71428600e-02],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+00,  3.26400855e-01,  1.42857140e-01],
       [ 1.00000000e+00,  1.60645395e-01,  4.57142860e-01]]), np.array([0.17482642, 0.11428571]))

In [5]:
env.observation_space.contains(obs)

False

In [6]:
obs[0].max()

1.0

In [7]:
obs[0].min()

-2.77555756e-17

In [9]:
obs_new = (np.where(obs[0]<0,0,obs[0]), obs[1])

In [10]:
env.observation_space.contains(obs_new)

True