In [106]:
!touch 2-epsilon_greedy.py
!chmod +x *.py

In [69]:
#!/usr/bin/env python3
""" Load the Environment """
import gym

def load_frozen_lake(desc=None, map_name=None, is_slippery=False):
    """
    ****************************************
    ****loads the pre-made FrozenLakeEnv****
    *****evnironment from OpenAI’s gym******
    ****************************************
    @desc: is either None or a list of lists containing a custom
           description of the map to load for the environment
    @map_name: is either None or a string containing the pre-made
               map to load
    *** If both desc and map_name are None, the environment will
        load a randomly generated 8x8 map
    @is_slippery: is a boolean to determine if the ice is slippery
    Returns:
            the environment
    """
    return gym.make("FrozenLake-v0",
                    desc=desc,
                    map_name=map_name,
                    is_slippery=is_slippery)
    

In [73]:
import numpy as np

np.random.seed(0)
env = load_frozen_lake()
print(env.desc)
print(env.P[0][0])
env = load_frozen_lake(is_slippery=True)
print(env.desc)
print(env.P[0][0])
desc = [['S', 'F', 'F'], ['F', 'H', 'H'], ['F', 'F', 'G']]
env = load_frozen_lake(desc=desc)
print(env.desc)
env = load_frozen_lake(map_name='4x4')
print(env.desc)
env.reset()
env.render()

[[b'S' b'F' b'F' b'F' b'F' b'F' b'F' b'H']
 [b'H' b'F' b'F' b'F' b'F' b'H' b'F' b'F']
 [b'F' b'H' b'F' b'H' b'H' b'F' b'F' b'F']
 [b'F' b'F' b'F' b'H' b'F' b'F' b'F' b'F']
 [b'F' b'F' b'F' b'F' b'F' b'F' b'H' b'F']
 [b'F' b'F' b'F' b'F' b'F' b'F' b'F' b'F']
 [b'F' b'F' b'F' b'F' b'H' b'F' b'F' b'F']
 [b'F' b'F' b'F' b'F' b'F' b'F' b'F' b'G']]
[(1.0, 0, 0.0, False)]
[[b'S' b'F' b'H' b'F' b'H' b'F' b'H' b'F']
 [b'H' b'F' b'F' b'F' b'F' b'F' b'F' b'F']
 [b'F' b'F' b'F' b'F' b'F' b'F' b'F' b'F']
 [b'F' b'H' b'F' b'F' b'F' b'F' b'F' b'F']
 [b'F' b'F' b'H' b'F' b'F' b'F' b'F' b'H']
 [b'F' b'F' b'F' b'F' b'F' b'H' b'F' b'H']
 [b'F' b'F' b'H' b'F' b'H' b'F' b'H' b'F']
 [b'F' b'F' b'H' b'F' b'F' b'F' b'F' b'G']]
[(0.3333333333333333, 0, 0.0, False), (0.3333333333333333, 0, 0.0, False), (0.3333333333333333, 8, 0.0, True)]
[[b'S' b'F' b'F']
 [b'F' b'H' b'H']
 [b'F' b'F' b'G']]
[[b'S' b'F' b'F' b'F']
 [b'F' b'H' b'F' b'H']
 [b'F' b'F' b'F' b'H']
 [b'H' b'F' b'F' b'G']]

[41mS[0mFFF
FHFH
FFFH
HFF

In [56]:
import gym
env = gym.make('CartPole-v0')
env.reset()
for _ in range(1000): 
    env.render()
    env.step(env.action_space.sample()) # take a random action




In [57]:
env.close()

In [55]:
help(gym.make("FrozenLake-v0"))

Help on TimeLimit in module gym.wrappers.time_limit object:

class TimeLimit(gym.core.Wrapper)
 |  Wraps the environment to allow a modular transformation.
 |  
 |  This class is the base class for all wrappers. The subclass could override
 |  some methods to change the behavior of the original environment without touching the
 |  original code.
 |  
 |  .. note::
 |  
 |      Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
 |  
 |  Method resolution order:
 |      TimeLimit
 |      gym.core.Wrapper
 |      gym.core.Env
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, env, max_episode_steps=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  reset(self, **kwargs)
 |      Resets the environment to an initial state and returns an initial
 |      observation.
 |      
 |      Note that this function should not reset the environment's random
 |      number generator(s); random variabl

In [71]:
#!/usr/bin/env python3
""" Initialize Q-table """
import numpy as np


def q_init(env):
    """
    ****************************************
    ***********Initialize Q-table***********
    ****************************************
    @env: is the FrozenLakeEnv instance
    Returns:
            the Q-table as a numpy.ndarray of zeros
    """
    return np.zeros((env.observation_space.n,
                     env.action_space.n))


In [72]:

env = load_frozen_lake()
Q = q_init(env)
print(Q.shape)
env = load_frozen_lake(is_slippery=True)
Q = q_init(env)
print(Q.shape)
desc = [['S', 'F', 'F'], ['F', 'H', 'H'], ['F', 'F', 'G']]
env = load_frozen_lake(desc=desc)
Q = q_init(env)
print(Q.shape)
env = load_frozen_lake(map_name='4x4')
Q = q_init(env)
print(Q.shape)

(64, 4)
(64, 4)
(9, 4)
(16, 4)


In [104]:
#!/usr/bin/env python3
""" Epsilon Greedy """
import numpy as np


def epsilon_greedy(Q, state, epsilon):
    """
    ******************************************************
    ***********uses epsilon-greedy to determine***********
    *******************the next action********************
    ******************************************************
    @Q: is a numpy.ndarray containing the q-table
    @state: is the current state
    @epsilon: is the epsilon to use for the calculation
    *** You should sample p with numpy.random.uniformn to determine
        if your algorithm should explore or exploit
    *** If exploring, you should pick the next action with
        numpy.random.randint from all possible actions
    Returns:
            the next action index
    """
    p = np.random.uniform()
    if p < epsilon:
        index = np.random.randint(Q.shape[1])
    else:
        index = np.argmax(Q[state])

    return index
     

In [105]:
import numpy as np

desc = [['S', 'F', 'F'], ['F', 'H', 'H'], ['F', 'F', 'G']]
env = load_frozen_lake(desc=desc)
Q = q_init(env)
Q[7] = np.array([0.5, 0.7, 1, -1])
np.random.seed(0)
print(epsilon_greedy(Q, 7, 0.5))
np.random.seed(1)
print(epsilon_greedy(Q, 7, 0.5))

2
0
