# RL example for Regularity Normalization (RN)
---
In this notebook, we demonstrated the empirical performances a DQN agent with or without RN with OpenAI Gym's LunarLander-v2 environment.

### 1. Import the Necessary Packages

In [1]:
import gym
import random
import torch
import numpy as np
import math 
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

### 2. Instantiate the Environment and Agent

Initialize the environment

In [2]:
env = gym.make('LunarLander-v2')
env.seed(0)
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

State shape:  (8,)
Number of actions:  4


Here we specify the agent to use:

In [3]:
from dqn_agent import Agent

agent = Agent(state_size=8, action_size=4, seed=0,qnet='DQN-RNLN')

# watch an untrained agent
state = env.reset()
for j in range(200):
    action = agent.act(state)
    env.render()
    state, reward, done, _ = env.step(action)
    if done:
        break 
        
env.close()

### 3. Train the Agent with DQN variants

Train the agents from scratch. 

In [4]:
def dqn(agentName, n_episodes=1000, max_t=1000,eps_start=1.0, eps_end=0.01, eps_decay=0.95, runs=10):
    """Deep Q-Learning.
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """

    scores_runs = 0*np.ndarray((runs,n_episodes)) # containing scores for each runs
    window_size = 10
    eps = eps_start                               # initialize epsilon
    solved = []
    for run in np.arange(runs):
        scores_window = deque(maxlen=window_size)             # last n scores
        agent = Agent(state_size=8, action_size=4, seed=run, qnet=agentName)
        firstPast = True
        scores = []                               # list containing scores from each episode
        for i_episode in range(1, n_episodes+1):
            state = env.reset()
            score = 0
            for t in range(max_t):
                action = agent.act(state, eps)
                next_state, reward, done, _ = env.step(action)
                agent.step(state, action, reward, next_state, done)
                state = next_state
                score += reward
                if done:
                    break 
            scores_window.append(score)       # save most recent score
            scores.append(score)              # save most recent score
            eps = max(eps_end, eps_decay*eps) # decrease epsilon
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
            if i_episode % 100 == 0:
                print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
            if np.mean(scores_window)>=200.0 and firstPast:
                firstPast = False
                solved.append(i_episode-window_size)
                print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-window_size, np.mean(scores_window)))
                # torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
                # break
        scores_runs[run,:]=scores
    return scores_runs,np.array(solved)


In [9]:
n = 5    # runs
m = 1000  # episodes
alpha = 0.1


In [13]:
print('--------------------------')
print('DQN')
scores1,solved1 = dqn('DQN',n_episodes=m,runs=n)
# np.save('./scores/dqn.npy',scores1)
# np.save('./scores/dqn_solved.npy',solved1)

print('--------------------------')
print('DQN-LN')
scores2,solved2 = dqn('DQN-LN',n_episodes=m,runs=n)
# np.save('./scores/dqnln.npy',scores2)
# np.save('./scores/dqnln_solved.npy', solved2)

print('--------------------------')
print('DQN-RN')
scores3,solved3 = dqn('DQN-RN',n_episodes=m,runs=n)
# np.save('./scores/dqnrn.npy',scores3)
# np.save('./scores/dqnrn_solved.npy', solved3)

print('--------------------------')
print('DQN-RLN')
scores4,solved4 = dqn('DQN-RLN',n_episodes=m,runs=n)
# np.save('./scores/dqnrln.npy',scores4)
# np.save('./scores/dqnrln_solved', solved4)

print('--------------------------')
print('DQN-RNLN')
scores5,solved5 = dqn('DQN-RNLN',n_episodes=m,runs=n)
# np.save('./scores/dqnrnln.npy',scores5)
# np.save('./scores/dqnrnln_solved.npy', solved5)


--------------------------
DQN
--------------------------
DQN-LN
--------------------------
DQN-RN
--------------------------
DQN-RLN
--------------------------
DQN-RNLN


In [None]:
# plot the scores
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111)

ax.plot(np.arange(m), np.mean(scores1,0),color='black',label='DQN')
ax.plot(np.arange(m), np.mean(scores2,0),color='blue',label='DQN+LN')
ax.plot(np.arange(m), np.mean(scores3,0),color='red',label='DQN+RN')
ax.plot(np.arange(m), np.mean(scores4,0),color='green',label='DQN+RLN')
ax.plot(np.arange(m), np.mean(scores5,0),color='orange',label='DQN+RN+LN')

ax.fill_between(np.arange(m), np.mean(scores1,0)-np.std(scores1,0)/math.sqrt(n),np.mean(scores1,0)+np.std(scores1,0)/math.sqrt(n),alpha=alpha,color='black')
ax.fill_between(np.arange(m), np.mean(scores2,0)-np.std(scores2,0)/math.sqrt(n),np.mean(scores2,0)+np.std(scores2,0)/math.sqrt(n),alpha=alpha,color='blue')
ax.fill_between(np.arange(m), np.mean(scores3,0)-np.std(scores3,0)/math.sqrt(n),np.mean(scores3,0)+np.std(scores3,0)/math.sqrt(n),alpha=alpha,color='red')
ax.fill_between(np.arange(m), np.mean(scores4,0)-np.std(scores4,0)/math.sqrt(n),np.mean(scores4,0)+np.std(scores4,0)/math.sqrt(n),alpha=alpha,color='green')
ax.fill_between(np.arange(m), np.mean(scores5,0)-np.std(scores5,0)/math.sqrt(n),np.mean(scores5,0)+np.std(scores5,0)/math.sqrt(n),alpha=alpha,color='orange')

ax.set_ylabel('Score')
ax.set_xlabel('Episode')
ax.legend()

fig.savefig('scores.png')

print('DQN scores ', np.mean(scores1[:,-1]), ' +/- ',np.std(scores1[:,-1])/math.sqrt(n))
print('DQN+LN scores ', np.mean(scores2[:,-1]), ' +/- ',np.std(scores2[:,-1])/math.sqrt(n))
print('DQN+RN scores ', np.mean(scores3[:,-1]), ' +/- ',np.std(scores3[:,-1])/math.sqrt(n))
print('DQN+RLN scores ', np.mean(scores4[:,-1]), ' +/- ',np.std(scores4[:,-1])/math.sqrt(n))
print('DQN+RN+LN scores ', np.mean(scores5[:,-1]), ' +/- ',np.std(scores5[:,-1])/math.sqrt(n))

print('DQN solves in ', np.mean(solved1), ' +/- ',np.std(solved1)/math.sqrt(n))
print('DQN+LN solves in ', np.mean(solved2), ' +/- ',np.std(solved2)/math.sqrt(n))
print('DQN+RN solves in ', np.mean(solved3), ' +/- ',np.std(solved3)/math.sqrt(n))
print('DQN+RLN solves  in ', np.mean(solved4), ' +/- ',np.std(solved4)/math.sqrt(n))
print('DQN+RN+LN solves in ', np.mean(solved5), ' +/- ',np.std(solved5)/math.sqrt(n))