# Introduction to Deep Reinforcement Learning
Book: "Introducción al aprendizaje por refuerzo profundo", Jordi Torres, 2021, Ed. Watch This Space
## Part 1, Chapter 2: Formalización del aprendizaje por refuerzo
### Frozen Lake: agent following the good plan in a environment with uncertainty (slippery lake)

### <font color='darkblue'>PRELIMINAR ELEMENTS</font>

In [39]:
# Import Gym library: https://gym.openai.com
import gym

In [40]:
# Creation of a class Agent that follows the good plan
class Agent:
    def __init__(self):
        self.actions = {'left':0, 'down':1, 'right':2, 'up':3}
        self.good_plan = 2 * ['down'] + ['right'] + ['down'] + 2 * ['right']
        self.step = 0
        
    def select_action(self):
        action = self.good_plan[self.step]
        self.step = (self.step + 1) % 6
        return self.actions[action]      
    
    def reset(self):
        self.step = 0

### <font color='darkblue'>RUNNING THE AGENT FOLLOWING GOOD WAY IN A NON-SLIPPERY ENVIRONMENT</font>

In [41]:
# Initialize variable 'agent' with the 'Agent' class
agent = Agent()

# Creation of environment FrozenLake, from Gym library, with no splippery
env = gym.make('FrozenLake-v0', is_slippery = False)

# Running and rendering one single episode
env.reset()        # reset environment before running episode
env.render()       # render the episode
is_done = False    # episode completion
t = 0              # time step
 
while not is_done:                                   # loop of experiences until episode finishes
    action = agent.select_action()                   # passing the decided action
    state, reward, is_done, _ = env.step(action)     # interaction with environment acc. to decided action
    env.render()                                     # render environment state
    t += 1

print('\nTotal time steps: ', t)

if state == 15:
    print('SOLVED!!!    :)')
else:
    print('NOT SOLVED  :(')
        


[41mS[0mFFF
FHFH
FFFH
HFFG
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
  (Down)
SFFF
FHFH
[41mF[0mFFH
HFFG
  (Right)
SFFF
FHFH
F[41mF[0mFH
HFFG
  (Down)
SFFF
FHFH
FFFH
H[41mF[0mFG
  (Right)
SFFF
FHFH
FFFH
HF[41mF[0mG
  (Right)
SFFF
FHFH
FFFH
HFF[41mG[0m

Total time steps:  6
SOLVED!!!    :)


### <font color='darkblue'>RUNNING THE AGENT IN A SLIPPERY ENVIRONMENT FOR A NUMBER OF EPISODES AND COUNTING SUCCESS RATE</font>

In [42]:
# Function for running one whole episode
def run_episode(agent, env):
    env.reset()        # reset environment before running episode
    agent.reset()      # reset the agent before running
    is_done = False    # episode completion
    t = 0              # time step
    
    while not is_done:                                   # loop of experiences until episode finishes
        action = agent.select_action()                   # passing the decided action
        state, reward, is_done, _ = env.step(action)     # interaction with environment acc. to decided action
        t += 1
    return (state, reward, is_done)

In [43]:
# Initialize variable 'agent' with the 'Agent' class
agent = Agent()

# Creation of environment FrozenLake, from Gym library, with splippery condition
env = gym.make('FrozenLake-v0', is_slippery = True)

n_episodes = 10000   # Definition of number of episode to run

solved = 0
for episode in range(n_episodes):
    state, reward, is_done = run_episode(agent, env)
    if state == 15:
        solved += 1
        
print(f'\n Solved {solved} times in {n_episodes} episodes. Success rate: {solved / n_episodes * 100 :.2f}%')



 Solved 387 times in 10000 episodes. Success rate: 3.87%
