In [1]:
import matplotlib.pyplot as plt
import numpy as np
import gym

print(gym.__version__)

0.17.3


In [2]:
all_envs = gym.envs.registry.all()
env_ids = [env.id for env in all_envs]

print(f'There are {len(env_ids)} gym environments. Such as {env_ids[:12]}')


There are 859 gym environments. Such as ['Copy-v0', 'RepeatCopy-v0', 'ReversedAddition-v0', 'ReversedAddition3-v0', 'DuplicatedInput-v0', 'Reverse-v0', 'CartPole-v0', 'CartPole-v1', 'MountainCar-v0', 'MountainCarContinuous-v0', 'Pendulum-v0', 'Acrobot-v1']


In [3]:
env = gym.make('CartPole-v1')

In [4]:
print('observation space is:', env.observation_space)

print('is observation space discrete?', isinstance(env.observation_space, gym.spaces.Discrete))
print('is observation space continuous?', isinstance(env.observation_space, gym.spaces.Box))

print('observation space shape:', env.observation_space.shape)

print('observation space high values?', env.observation_space.high) # high와 low 차이가 뭐지?
print('observation space low values?', env.observation_space.low)


observation space is: Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
is observation space discrete? False
is observation space continuous? True
observation space shape: (4,)
observation space high values? [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]
observation space low values? [-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]


In [5]:
print('action space is:', env.action_space)

print('is action space discrete?', isinstance(env.action_space, gym.spaces.Discrete))
print('is action space continuous?', isinstance(env.action_space, gym.spaces.Box))

print('action space shape:', env.action_space.n)


action space is: Discrete(2)
is action space discrete? True
is action space continuous? False
action space shape: 2


In [6]:
print(env.spec.max_episode_steps)
print(env.spec.reward_threshold)
print(env.spec.nondeterministic)


500
475.0
False


In [7]:
env = gym.make('CartPole-v1')

env.seed(1234)

state = env.reset()

print('state type:', type(state))
print('state shape:', state.shape)
print('state:', state)


state type: <class 'numpy.ndarray'>
state shape: (4,)
state: [-0.01315549 -0.04012824  0.04801855 -0.0044493 ]


In [8]:
action = env.action_space.sample() #select random action, uniformly between high and low for continuous

print('selected action:', action)

selected action: 0


In [9]:
state, reward, done, info = env.step(action) #perform action on environment

print('state:', state)
print('reward:', reward)
print('done:', done)
print('info:', info)

state: [-0.01395805 -0.23590479  0.04792957  0.30298905]
reward: 1.0
done: False
info: {}


In [10]:
env = gym.make('SpaceInvadersNoFrameskip-v4')

env.seed(1234)

n_episodes = 10

for episode in range(n_episodes):
    
    episode_reward = 0
    done = False
    state = env.reset()
    
    while not done:
        
        action = env.action_space.sample()
        
        state, reward, done, _ = env.step(action)
        
        episode_reward += reward
        
    print(f'episode: {episode+1}, reward: {episode_reward}')


episode: 1, reward: 230.0
episode: 2, reward: 105.0
episode: 3, reward: 55.0
episode: 4, reward: 15.0
episode: 5, reward: 435.0
episode: 6, reward: 235.0
episode: 7, reward: 380.0
episode: 8, reward: 105.0
episode: 9, reward: 110.0
episode: 10, reward: 210.0
