This notebook is an introduction to using OpenAI Gym for RL experiments

In [1]:
# A bunch of imports for running OpenAI environments (locally?)

import gym
import numpy as np
import random

In [21]:
# We are choosing an environment 

'''

List of example environments

(Source - https://gym.openai.com/envs/#classic_control)

'Acrobot-v1'
'CartPole-v1'
'MountainCar-v0'

'''

env = gym.make('CartPole-v1')
env.seed(0)

state_shape = env.observation_space.shape[0]
no_of_actions = env.action_space.n

print(state_shape)
print(no_of_actions)

print(env.action_space.sample())

4
2
0


In [29]:
# Understanding State, Action, Reward Dynamics
'''
The agent decides an action to take depending on the state.

The Environment keeps a variable specifically for the current state.
- Everytime an action is passed to the environment, it calculates the new state and updates the current state variable.
- It returns the new current state and reward for the agent to take the next action

'''

state = env.reset()   # This returns the initial state (when environment is reset)
print(state)
print("----")

action = env.action_space.sample()   # We take a random action now
print(action)
print("----")

next_state, reward, done, info = env.step(action) # env.step is used to calculate new state and obtain reward 
                                                  # based on old state and action taken
print(next_state)
print(reward)
print(done)
print(info)

[ 0.02816094 -0.04843859 -0.00730189  0.01953373]
----
0
----
[ 0.02719217 -0.24345507 -0.00691121  0.30990391]
1.0
False
{}


In [23]:
#Rendering the environment

env.reset()
for _ in range(1000):
    env.render()
    random_action = env.action_space.sample() #Ideally, these should be good actions that the agent decides
    env.step(random_action)
env.close()