## Imports

In [1]:
import numpy as np
import time
import os
import gym
import sys
from gym import error, spaces
from gym import utils
from gym.utils import seeding
try:
    import atari_py
except ImportError as e:
    raise error.DependencyNotInstalled(
            "{}. (HINT: you can install Atari dependencies by running "
            "'pip install gym[atari]'.)".format(e))

## Get RAM

In [3]:
def to_ram(ale):
    ram_size = ale.getRAMSize()
    ram = np.zeros((ram_size), dtype=np.uint8)
    ale.getRAM(ram)
    return ram

 ### Fragmented game vizualization

Agent has 5 lives


In [6]:
env = gym.make('Breakout-v0')
env.reset()
for i in range(200):
    if i % 100 == 0:
        print(i)
    env.render()
    env.step(env.action_space.sample()) # take a random action
    time.sleep(0.1)
print("Finished")
env.close()

0
100
Finished


### Game info vizualization

* info has remaining lives of agent
* reward seems to be 0
* observation seems to be matrix of 0 or 142


In [2]:
env = gym.make('Breakout-v0')
print_observation = False
for i_episode in range(3):
    observation = env.reset()
    for t in range(1000):
        env.render()
        if (t + 1) % 100 == 0:
            print(f"info: {info}")
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if reward > 0:
            print(reward, info)
        time.sleep(0.001)
        if done:
            print(f"observations shape: {observation.shape}")
            print(f"reward: {reward}")
            print(f"info: {info}")
            print("Episode finished after {} timesteps".format(t+1))
            if print_observation:
                for x in observation:
                    if np.amax(x) > 0:
                        print(x)
                        break
            break
env.close()

1.0 {'ale.lives': 4}
info: {'ale.lives': 4}
info: {'ale.lives': 2}
1.0 {'ale.lives': 2}
info: {'ale.lives': 1}
1.0 {'ale.lives': 1}
1.0 {'ale.lives': 1}
info: {'ale.lives': 1}
1.0 {'ale.lives': 1}
info: {'ale.lives': 1}
observations shape: (210, 160, 3)
reward: 0.0
info: {'ale.lives': 0}
Episode finished after 507 timesteps
info: {'ale.lives': 3}
1.0 {'ale.lives': 2}
info: {'ale.lives': 1}
1.0 {'ale.lives': 1}
observations shape: (210, 160, 3)
reward: 0.0
info: {'ale.lives': 0}
Episode finished after 257 timesteps
1.0 {'ale.lives': 4}
info: {'ale.lives': 4}
info: {'ale.lives': 1}
observations shape: (210, 160, 3)
reward: 0.0
info: {'ale.lives': 0}
Episode finished after 206 timesteps


### Env data

In [7]:
print(env.action_space)
print(env.observation_space)
print(env.observation_space.shape)
# print(env.observation_space.high)
# print(env.observation_space.low)

Discrete(4)
Box(0, 255, (210, 160, 3), uint8)
(210, 160, 3)


### Printing the entire np array

In [8]:
from gym import envs
# print(envs.registry.all())

In [6]:
np.set_printoptions(threshold=sys.maxsize) 
observation = env.reset()
for t in range(1):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
observation.shape

(210, 160, 3)

### Playing more with gym env

In [19]:
env = gym.make("Breakout-v4").env
env.render()
print(f"Action Space {env.action_space}")
print(f"State Space {env.observation_space}")
time.sleep(2)
env.close()

Action Space Discrete(4)
State Space Box(0, 255, (210, 160, 3), uint8)


In [5]:
env = gym.make("Breakout-v0")
print("env:", env)
print("env.env:", env.env)


observation = env.reset()
for t in range(10):
    env.render()
    action = env.action_space.sample()
    print(action)
    observation, reward, done, info = env.step(t % 2)
    if reward > 0:
        print("Not negative reward", reward, info)
    time.sleep(1)
    if done:
        break


time.sleep(2)
env.close()
print("help:", help(env.env))

env: <TimeLimit<AtariEnv<Breakout-v0>>>
env.env: <AtariEnv<Breakout-v0>>
0
2
1
2
1
3
0
2
2
1
Help on AtariEnv in module gym.envs.atari.atari_env object:

class AtariEnv(gym.core.Env, gym.utils.ezpickle.EzPickle)
 |  AtariEnv(game='pong', mode=None, difficulty=None, obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.0, full_action_space=False)
 |  
 |  The main OpenAI Gym class. It encapsulates an environment with
 |  arbitrary behind-the-scenes dynamics. An environment can be
 |  partially or fully observed.
 |  
 |  The main API methods that users of this class need to know are:
 |  
 |      step
 |      reset
 |      render
 |      close
 |      seed
 |  
 |  And set the following attributes:
 |  
 |      action_space: The Space object corresponding to valid actions
 |      observation_space: The Space object corresponding to valid observations
 |      reward_range: A tuple corresponding to the min and max possible rewards
 |  
 |  Note: a default reward range set to [-inf