## LunarLanderContinuous-v2
[link](https://github.com/Rafael1s/Deep-Reinforcement-Learning-Udacity/blob/master/LunarLanderContinuous-v2-DDPG/LunarLanderContinuous-v2-DDPG_2560epis.ipynb)


In [None]:
import gym
import random
import torch
import numpy as np

import time
from ddpg_agent_2 import Agent, ReplayBuffer, device

from collections import deque

start_timestep=1e4

from  collections  import deque

In [None]:
env = gym.make('LunarLanderContinuous-v2')
seed = 0
env.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

state = env.reset()
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0] 
max_action = float(env.action_space.high[0])
threshold = env.spec.reward_threshold
print('threshold: ', threshold)

print('Size of each action:', action_dim)
# examine the state space 
print('Each observes a state with length: {}', state_dim)
print('The state for the first agent looks like:', state[0])

import matplotlib.pyplot as plt
%matplotlib inline
agent = Agent(state_size=state_dim, action_size=action_dim, random_seed=8)

In [None]:
def ddpg(n_episodes=20000, print_every=10):
    scores_deque = deque(maxlen=100)
    scores_array = []
    avg_scores_array = [] 
        
    time_start = time.time()
    
    timestep_after_last_save = 0
    total_timesteps = 0
        
    for i_episode in range(1, n_episodes+1):
        state = env.reset()                                    # set the environment    
        agent.reset()
        score_average = 0
        
        timestep = 0
        total_reward = 0
        
        done = False

        while True:
            action = agent.act(state)
                        
            # Perform action
            next_state, reward, done, _ = env.step(action) 
            done_bool = 0 if timestep + 1 == env._max_episode_steps else float(done)
            total_reward += reward                          # full episode reward
        
            ## def step(self, states, actions, rewards, next_states, dones, timestep):
            agent.step(state, action, reward, next_state, done, timestep)
            state = next_state                               # roll over states to next time step
            
            timestep += 1     
            total_timesteps += 1
            timestep_after_last_save += 1

            if done:                                  # exit loop if episode finished
                break
        
        scores_deque.append(total_reward)
        scores_array.append(total_reward)

        avg_score = np.mean(scores_deque)
        avg_scores_array.append(avg_score)
        
        # train_by_episode(time_start, i_episode) 
        s = (int)(time.time() - time_start)
                        
        if i_episode % print_every == 0 or (len(scores_deque) == 100 and np.mean(scores_deque) >= threshold) :
            #torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
            #torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
            s = (int)(time.time() - time_start) 
            print('Episode {} Score: {:.2f} Average Score: {:.2f}, Time: {:02}:{:02}:{:02} ***    '\
                  .format(i_episode, total_reward, avg_score, s//3600, s%3600//60, s%60))  
            
        if len(scores_deque) == 100 and np.mean(scores_deque) >= threshold:  
            print('Environment solved !   ')
            break
            
    return scores_array, avg_scores_array

scores, avg_scores = ddpg()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores, label="Score")
plt.plot(np.arange(1, len(avg_scores)+1), avg_scores, label="Avg on 100 episodes")
plt.legend(bbox_to_anchor=(1.05, 1)) 
plt.ylabel('Score')
plt.xlabel('Episodes #')
plt.show()

In [None]:
def save(agent, filename, directory):
    torch.save(agent.actor_local.state_dict(), '%s/%s_actor_local.pth' % (directory, filename))
    torch.save(agent.actor_target.state_dict(), '%s/%s_actor_target.pth' % (directory, filename))
    torch.save(agent.critic_local.state_dict(), '%s/%s_critic_local.pth' % (directory, filename))
    torch.save(agent.critic_target.state_dict(), '%s/%s_critic_target.pth' % (directory, filename))


save(agent, 'LunarLanderContinuous-v2', 'dir_chk_2')