In [1]:
import gym
from q_learning import TabularQAgent
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [2]:
#automatic reloading
%load_ext autoreload
%autoreload 2

In [3]:
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

def rolling_average(a, n=3):
    ret = []
    t = 0
    while t < len(a):
        ret.append(np.mean(a[t:t+n]))
        t += n
    return ret

# 4x4 Frozen Lake

In [4]:
polynomial_learning_rates = [0.0, 0.1, 0.3, 0.5, 0.7, 0.8, 0.9]
plt.figure(figsize=(15, 7))
env = gym.make('FrozenLake-v0') # creating a grid word environment
env = FLenv
for polynomial_learning_rate in polynomial_learning_rates:
    agent_q = TabularQAgent(env.observation_space, env.action_space, n_episodes=20000, 
                           init_std=1, discount=1, n_itr=1000)
    reached_goals = agent_q.learn(env, polynomial_learning_rate = polynomial_learning_rate)
    plt.plot(rolling_average(reached_goals, 100))
plt.legend(map(str, polynomial_learning_rates), loc = "lower right")

INFO:gym.envs.registration:Making new env: FrozenLake-v0
[2016-09-06 12:06:15,909] Making new env: FrozenLake-v0


NameError: name 'FLenv' is not defined

<matplotlib.figure.Figure at 0x7f16559c7850>

## Analyzing the behavior of eligibility traces with Frozen Lake Problem

1. The behavior of learning curve with eligibility traces ($\lambda$) is not consistent.

In [None]:
eligibility_traces = 0.1*np.arange(0,10)
plt.figure(figsize=(15, 7))
env_TD = gym.make('FrozenLake-v0') # creating a grid word environment
for eligibity_trace in eligibility_traces:
    agent_q_TD = TabularQAgent(env.observation_space, env.action_space, n_episodes=20000, 
                               init_std=1, discount=1, n_itr=1000, eligibility_trace = eligibity_trace, eps = 0.01)
    reached_goals_td = agent_q_TD.sarsa_lambda(env_TD)
    plt.plot(moving_average(reached_goals_td, 1000))
plt.legend(map(str, eligibility_traces), loc = 'upper left')

In [None]:
eligibility_traces = 0.1*np.arange(0,10)
plt.figure(figsize=(15, 7))
env_TD = gym.make('FrozenLake-v0') # creating a grid word environment
for eligibity_trace in eligibility_traces:
    agent_q_TD = TabularQAgent(env.observation_space, env.action_space, n_episodes=20000, 
                               init_std=1, discount=1, n_itr=1000, eligibility_trace = eligibity_trace, eps = 0.01)
    reached_goals_td = agent_q_TD.sarsa_lambda(env_TD)
    agent_q_TD.accuracy(env_TD, 100)
    plt.plot(moving_average(reached_goals_td, 1000))
plt.legend(map(str, eligibility_traces), loc = 'upper left')

In [None]:
from gym.envs.registration import register, spec

MY_ENV_NAME='FrozenLakeNonSlippery-v0'
try:
    spec(MY_ENV_NAME)
except:
    register(
        id=MY_ENV_NAME,
        entry_point='gym.envs.toy_text:FrozenLakeEnv',
        kwargs={'map_name': '4x4', 'is_slippery': False},
    )
FLenv = gym.make(MY_ENV_NAME)

In [None]:
q_agent = TabularQAgent(FLenv.observation_space, FLenv.action_space, init_std=0, discount=0.9, 
                        n_episodes=20000, eps=1, polynomial_learning_rate=0.0, eligibility_trace=0.9)
t = q_agent.learn(FLenv)
q_agent.accuracy(FLenv, 100)

In [None]:
q_agent.accuracy(FLenv, 1000)

In [None]:
cum_reward = 0.0
max_iteration = 1000
q = q_agent.q
for n_episode in xrange(100):
    present_state = FLenv.reset()
    for n_itr in xrange(max_iteration):
        action = np.argmax(q[present_state])
        next_state, reward, done, _ = FLenv.step(action)
        present_state = next_state
        if done:
            cum_reward += reward
            break

In [None]:
env.render()

# 8x8 Frozen Lake

In [None]:
env = gym.make('FrozenLake8x8-v0') # creating a grid word environment
q_learning = TabularQAgent(env.observation_space, env.action_space, n_episodes=100000, 
                           init_std=1, discount=1, n_itr=10000)
q_learning.learn(env)
q_learning.accuracy(env, 100)
