# Continuous Control

In [None]:
#!nvidia-smi
!pip -q install .
!pip -q install torch
#!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

In [None]:
from unityagents import UnityEnvironment
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt

from ddpg_agent import Agent

multiagent = False

if (multiagent):
    env = UnityEnvironment(file_name='/data/Reacher_Linux_NoVis/Reacher.x86_64')
else:
    env = UnityEnvironment(file_name='/data/Reacher_One_Linux_NoVis/Reacher_One_Linux_NoVis.x86_64')
    
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

In [None]:
#env.seed(2)
agent = Agent(state_size=state_size, action_size=action_size, random_seed=0, num_agents=num_agents)

num_episodes = 200

### Train the Agent with DDQN

Note that you should set `train_mode=True` to restart the environment.

When finished, you can close the environment.

In [None]:
def ddpg(n_episodes=num_episodes, print_every=100):
    scores_window = deque(maxlen=print_every)
    scores = []
    max_score = 0
    for i_episode in range(1, n_episodes+1):
        env_info = env.reset(train_mode=True)[brain_name]            # reset the environment
        states = env_info.vector_observations if multiagent else env_info.vector_observations[0]
        agent.reset()
        score = np.zeros(num_agents)
            
        while True:
            actions = agent.act(states)
            env_info = env.step(actions)[brain_name]               # send the action to the environment  
            # get the next state
            next_states = env_info.vector_observations if multiagent else env_info.vector_observations[0]      
            rewards = env_info.rewards if multiagent else env_info.rewards[0] # get the reward
            dones = env_info.local_done if multiagent else env_info.local_done[0] # see if episode has finished
            
            agent.step(states, actions, rewards, next_states, dones)

            states = next_states
            score += rewards
            if np.any(dones):
                break
        
        scores_window.append(score)
        scores.append(score)
        avgscore_ep = np.mean(score)
        avgscore_deque = np.mean(scores_deque)
        print(f"Episode: {i_episode}/{num_episodes}\tScore: {avgscore_ep:.2f}\tAverage Score: {avgscore_deque:.2f}")
        if avgscore_deque>=30.0:
            print(f"***Solved! Episodes taken: {i_episode:d} Average Score: {avgscore_deque:.3f}***")
            if avgscore_deque > max_score:
                max_score = avgscore_deque
                torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
                torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
            
    return scores

scores = ddpg()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()