In [2]:
import gym
env = gym.make('FrozenLake-v0')
env.render()
#simple code to generate and render a certain game
#use gym.make('EnvironmentName-vN') where N is the version
#list of all availible environments is online


[41mS[0mFFF
FHFH
FFFH
HFFG


In [3]:
#returns the observation space. 
#It will be discrete in this case
env.observation_space

Discrete(16)

In [4]:
#returns the action space
#This will also be discrete 
env.action_space

Discrete(4)

In [5]:
env.P[0][2]
#The transition probability of using the action right(2) at state S(0). 
#Hmm, we have a 1/3 chance of going to state 4, or down, state 1, or right, and state 0 or stay in place. 
#The 0's at the second index in the tuples represent the reward at that state.
#The Falses at the end state if this will be a terminal state, and clearly none of them holes or goals.

[(0.3333333333333333, 4, 0.0, False),
 (0.3333333333333333, 1, 0.0, False),
 (0.3333333333333333, 0, 0.0, False)]

In [6]:
#Lets test the same for state 3 and action 1
#In this case state 7 is a hole, so the last index of the tuple is True, meaing that it is a terminal state. 
#Again, the transition probabilities are the same, and there is no reward
env.P[3][1]

[(0.3333333333333333, 2, 0.0, False),
 (0.3333333333333333, 7, 0.0, True),
 (0.3333333333333333, 3, 0.0, False)]

In [7]:
#Ok enough messing around, time to play an episode
#reset environment with env.reset()
state = env.reset()

In [8]:
#perform action 1(down)
#Use env.step(action) function
env.step(1)
env.render()

  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG


In [9]:
#Looks like we got the 1/3 chance to stay in the same place
#lets try again
#This time we can also get the output
state = env.step(2)
env.render()
print(state)

  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
(0, 0.0, False, {'prob': 0.3333333333333333})


In [10]:
#This time we made it to state 1.
#We had a 'prob' of 1/3, and got a reward of 0.0
#The False indicates that we are not at a terminal state 
#We can brake up this tuple
next_state, reward, done, info = env.step(1)
env.render()

  (Down)
[41mS[0mFFF
FHFH
FFFH
HFFG


In [11]:
print('next_state: ', next_state)
print('reward: ', reward)
print('done: ', done)
print('info ', info)

next_state:  0
reward:  0.0
done:  False
info  {'prob': 0.3333333333333333}


In [12]:
#We see that in this case we are done, because we reached a hole(That took a lot of attempts lol)
#We got all the results we expected to see
#Lets see how we can pick a random action 
#First, we need to reset, since we lost the game
env.reset()
random_action = env.action_space.sample()
next_state, reward, done, info = env.step(random_action)
env.render()

  (Right)
SFFF
[41mF[0mHFH
FFFH
HFFG


In [13]:
#It looks as though our action was down, but we stayed in the same place with 1/3 probability 
#Lets test out some more randomness. 
#Lets automatically generate a complete episode randomly

state = env.reset()
print('Time Step 0 :')
env.render()

#Cap the number of moves as 20
num_timesteps = 20

for t in range(num_timesteps):
    
    #Randomly sample action
    random_action = env.action_space.sample()
    new_state, reward, done, info = env.step(random_action)
    
    print('Time Step {}:'.format(t+1))
    env.render()
    
    #Check if the game is over
    if done:
        break

Time Step 0 :

[41mS[0mFFF
FHFH
FFFH
HFFG
Time Step 1:
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
Time Step 2:
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
Time Step 3:
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
Time Step 4:
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG


In [14]:
#Unfortunate! It looks as though we lost the game.
#In a training environment, you would look through many episodes 
#Lets see how we can do so
#The code is pretty similar
num_episodes = 10
num_timesteps = 20

for i in range(num_episodes):
    state = env.reset()
    #print('Time Step 0:')
    #env.render()
    
    for t in range(num_timesteps):
        random_action = env.action_space.sample()
        new_state, reward, done, info = env.step(random_action)
        #print('Time Step {}'.format(t + 1))
            
        #env.render()
        if done:
            break
            

In [16]:
#When we are done using the environment, use env.close()
env.close()