# Frozen Lake: Reinforcement Learning

In [1]:
from ReinforcementLearning import *
%matplotlib notebook

## Deterministic case 

### Q-Learning 

In [2]:
env = FrozenLake.make(is_slippery=False)

In [3]:
strategy = QLearning(num_of_episodes=500,
                     policy=UniformRandomPolicy(env), 
                     learning_rate=1.0, 
                     decay_rate=0.01,
                     gamma=0.95,  # default value is 1.0
                     epsilon_min=0.01,  # is default value
                     epsilon_max=1.0)  # is default value

In [4]:
agent = Agent(env, strategy)

In [5]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

### Monte Carlo 

In [6]:
strategy = MonteCarlo(num_of_episodes=500,
                      policy=UniformRandomPolicy(env), 
                      learning_rate=1.0, 
                      decay_rate=0.01,
                      gamma=0.95)
agent = Agent(env, strategy)

In [7]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

### N-Step Q-Learning 

In [8]:
strategy = NStepQLearning(num_of_episodes=500,
                          policy=UniformRandomPolicy(env), 
                          learning_rate=1.0,
                          Nstep=4,
                          decay_rate=0.01,
                          gamma=0.95)
agent = Agent(env, strategy)

In [9]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

### Value Iteration 

In [10]:
strategy = ValueIteration(num_of_episodes=500,
                          policy=UniformRandomPolicy(env), 
                          decay_rate=0.01,
                          gamma=0.95)
agent = Agent(env, strategy)

In [11]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

## Stochastic case 

### Q-Learning 

In [12]:
env = FrozenLake.make(is_slippery=True)

In [13]:
strategy = QLearning(num_of_episodes=500,
                     policy=UniformRandomPolicy(env), 
                     learning_rate=0.7,
                     decay_rate=0.01,
                     gamma=1.0)
agent = Agent(env, strategy)

In [14]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

### Monte Carlo 

In [15]:
strategy = MonteCarlo(num_of_episodes=1000,
                      policy=UniformRandomPolicy(env), 
                      learning_rate=0.5,
                      decay_rate=0.01,
                      gamma=0.95)
agent = Agent(env, strategy)

In [16]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

### N-Step Q-Learning 

In [17]:
strategy = MonteCarlo(num_of_episodes=1000,
                      policy=UniformRandomPolicy(env), 
                      learning_rate=0.5,
                      decay_rate=0.01,
                      gamma=0.95)
agent = Agent(env, strategy)

In [18]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

### Value Iteration

In [19]:
strategy = ValueIteration(num_of_episodes=500,
                          policy=UniformRandomPolicy(env), 
                          decay_rate=0.01,
                          gamma=1.0)
agent = Agent(env, strategy)

In [20]:
print(np.reshape(strategy.Vs, (4, 4), order="c"))
print(strategy.Qsa)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [21]:
print(strategy.policy.prob)

[[0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]
 [0.25 0.25 0.25 0.25]]


In [22]:
agent.learn(plot_frequency=50)

<IPython.core.display.Javascript object>

In [23]:
policy = strategy.mdp.policy_improvement(strategy.Vs)

In [24]:
env.plot(values=strategy.Vs, policy=policy)

<IPython.core.display.Javascript object>

In [25]:
import matplotlib
matplotlib.get_backend()

'nbAgg'