In [1]:
import gym
import numpy as np

In [2]:
env = gym.make("FrozenLake-v0")

n_states     = 16 # This is a 4*4 matrix. Don't change this parameter.
min_episodes = 100
max_episodes = 100000
max_steps    = 10000
alpha        = 0.5
gamma        = 0.9
espilon      = 0.5
treshold     = 0.76
verbosity    = 200

q_table      = dict([(x, [1, 1, 1, 1]) for x in range(n_states)]) # {0: [1, 1, 1, 1], 1: [1, 1, 1, 1], ……… , 15: [1, 1, 1, 1]}
scores       = []

In [3]:
def select_action_sarsa():
    if np.random.rand() < espilon:
        return env.action_space.sample()
    all_observations = np.sum(list(q_table.values()),axis=0)
    return np.argmax(all_observations)

In [4]:
def select_action_learning(observation):
    return np.argmax(q_table[observation])

In [5]:
def main():
    for i in range(max_episodes):
        observation = env.reset()
        action = select_action_learning(observation)

        prev_observation = None
        prev_action      = None

        t = 0
        avg_score = 0

        for t in range(max_steps):
            observation, reward, done, info = env.step(action)
            action = select_action_learning(observation)

            if t > 1:
                q_old = q_table[prev_observation][prev_action]
                q_table[prev_observation][prev_action] = q_old + alpha * (reward + gamma *(1-done)*q_table[observation][action] - q_old)

            prev_observation = observation
            prev_action = action

            if done:
                scores.append(reward)
                
                if i>min_episodes:
                    avg_score = np.mean(scores[-min(101,len(scores)):-1]) 

                    if avg_score>=treshold:
                        return i, avg_score

                    if i%verbosity == 0:
                        print("Episode #{:04d} | Steps = {:03d} | Reward = {:01d} | Average score = {}".format(i, t, int(reward), avg_score))

                break
    return i, avg_score

In [6]:
episode, score = main()

print("The SARSA algorithm reached an average score during the last 100 episodes of {} in {} episodes".format(score, episode))

Episode #0200 | Steps = 001 | Reward = 0 | Average score = 0.0
Episode #0400 | Steps = 020 | Reward = 0 | Average score = 0.0
Episode #0600 | Steps = 002 | Reward = 0 | Average score = 0.01
Episode #0800 | Steps = 002 | Reward = 0 | Average score = 0.02
Episode #1000 | Steps = 002 | Reward = 0 | Average score = 0.02
Episode #1200 | Steps = 002 | Reward = 0 | Average score = 0.0
Episode #1400 | Steps = 003 | Reward = 0 | Average score = 0.02
Episode #1600 | Steps = 002 | Reward = 0 | Average score = 0.03
Episode #1800 | Steps = 001 | Reward = 0 | Average score = 0.04
Episode #2000 | Steps = 008 | Reward = 0 | Average score = 0.06
Episode #2200 | Steps = 002 | Reward = 0 | Average score = 0.01
Episode #2400 | Steps = 001 | Reward = 0 | Average score = 0.03
Episode #2600 | Steps = 005 | Reward = 0 | Average score = 0.0
Episode #2800 | Steps = 003 | Reward = 0 | Average score = 0.03
Episode #3000 | Steps = 002 | Reward = 0 | Average score = 0.02
Episode #3200 | Steps = 010 | Reward = 0 | A

Episode #25800 | Steps = 001 | Reward = 0 | Average score = 0.03
Episode #26000 | Steps = 003 | Reward = 0 | Average score = 0.0
Episode #26200 | Steps = 016 | Reward = 0 | Average score = 0.01
Episode #26400 | Steps = 002 | Reward = 0 | Average score = 0.01
Episode #26600 | Steps = 003 | Reward = 0 | Average score = 0.01
Episode #26800 | Steps = 007 | Reward = 0 | Average score = 0.05
Episode #27000 | Steps = 001 | Reward = 0 | Average score = 0.03
Episode #27200 | Steps = 005 | Reward = 0 | Average score = 0.01
Episode #27400 | Steps = 003 | Reward = 0 | Average score = 0.05
Episode #27600 | Steps = 006 | Reward = 0 | Average score = 0.0
Episode #27800 | Steps = 002 | Reward = 0 | Average score = 0.05
Episode #28000 | Steps = 002 | Reward = 0 | Average score = 0.01
Episode #28200 | Steps = 007 | Reward = 0 | Average score = 0.04
Episode #28400 | Steps = 003 | Reward = 0 | Average score = 0.0
Episode #28600 | Steps = 009 | Reward = 0 | Average score = 0.04
Episode #28800 | Steps = 008

Episode #51200 | Steps = 014 | Reward = 0 | Average score = 0.0
Episode #51400 | Steps = 005 | Reward = 0 | Average score = 0.01
Episode #51600 | Steps = 003 | Reward = 0 | Average score = 0.03
Episode #51800 | Steps = 002 | Reward = 0 | Average score = 0.02
Episode #52000 | Steps = 003 | Reward = 0 | Average score = 0.02
Episode #52200 | Steps = 009 | Reward = 0 | Average score = 0.02
Episode #52400 | Steps = 002 | Reward = 0 | Average score = 0.0
Episode #52600 | Steps = 010 | Reward = 1 | Average score = 0.02
Episode #52800 | Steps = 019 | Reward = 0 | Average score = 0.01
Episode #53000 | Steps = 016 | Reward = 0 | Average score = 0.0
Episode #53200 | Steps = 001 | Reward = 0 | Average score = 0.03
Episode #53400 | Steps = 001 | Reward = 0 | Average score = 0.03
Episode #53600 | Steps = 003 | Reward = 0 | Average score = 0.04
Episode #53800 | Steps = 001 | Reward = 0 | Average score = 0.03
Episode #54000 | Steps = 009 | Reward = 0 | Average score = 0.03
Episode #54200 | Steps = 008

Episode #77400 | Steps = 008 | Reward = 0 | Average score = 0.03
Episode #77600 | Steps = 004 | Reward = 0 | Average score = 0.04
Episode #77800 | Steps = 002 | Reward = 0 | Average score = 0.03
Episode #78000 | Steps = 004 | Reward = 0 | Average score = 0.02
Episode #78200 | Steps = 001 | Reward = 0 | Average score = 0.01
Episode #78400 | Steps = 017 | Reward = 0 | Average score = 0.0
Episode #78600 | Steps = 002 | Reward = 0 | Average score = 0.0
Episode #78800 | Steps = 004 | Reward = 0 | Average score = 0.0
Episode #79000 | Steps = 007 | Reward = 0 | Average score = 0.01
Episode #79200 | Steps = 002 | Reward = 0 | Average score = 0.02
Episode #79400 | Steps = 001 | Reward = 0 | Average score = 0.04
Episode #79600 | Steps = 001 | Reward = 0 | Average score = 0.03
Episode #79800 | Steps = 013 | Reward = 0 | Average score = 0.02
Episode #80000 | Steps = 005 | Reward = 0 | Average score = 0.0
Episode #80200 | Steps = 002 | Reward = 0 | Average score = 0.03
Episode #80400 | Steps = 001 