## Mujoco

In [None]:
import random
import torch
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline
import gym
import numpy as np
import ddpg_agent
from ddpg_agent import Agent

In [None]:
envName = 'InvertedDoublePendulum-v2'
env = gym.make(envName)
env.reset(seed=0)
stateDim = env.observation_space.shape[0]
actionDim = env.action_space.shape[0]
print('The dimension of the state space is: ', stateDim)
print('The dimension of the action space is: ', actionDim)

In [None]:
agents = Agent(state_size=stateDim, action_size=actionDim, random_seed=0, num_agents = 1) # instantiate the agent

In [None]:
n_episodes = 20000
max_t = 1000
print_every = 100

scores_deque = deque(maxlen=print_every)
scores = []
last_mean_score = 30
plot_score = []
mark = []

In [None]:
for i_episode in range(1, n_episodes+1):
    states = env.reset()
#     states = env_info.vector_observations
    
    agents.reset() # set the noise to zero
    score = 0
    for t in range(max_t):
        
#         print(len(states))
        actionRaw = agents.act(states) 
#         print(np.squeeze(actions, axis=0))
        actions = np.squeeze(actionRaw, axis=0)

        next_states, rewards, dones, _ = env.step(actions)
                           

        agents.step(states, actionRaw, rewards, next_states, dones)

        states = next_states
        score += rewards
        if np.any(dones):
            break 
    scores_deque.append(np.mean(score))
    scores.append(np.mean(score))
    print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")
    if i_episode % print_every == 0:
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
        plot_score.append(np.mean(scores_deque))
        mark.append(i_episode)
    if len(scores_deque) == print_every:
        mean_score = np.mean(scores_deque)
        if mean_score > last_mean_score:
            torch.save(agents.actor_local.state_dict(), 'checkpoint_actor.pth')
            torch.save(agents.critic_local.state_dict(), 'checkpoint_critic.pth')
            last_mean_score = mean_score



In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(mark, plot_score, linewidth=2)
plt.ylabel('Score', fontsize=16)
plt.xlabel('Episode #', fontsize=16)
# plt.show()
plt.savefig('DDPG.png', dpi=300)

###  Replay the trained agent
By using the trained actor and critic network, we can replay the agent

In [1]:
import random
import torch
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline
import gym
import numpy as np
import ddpg_agent
from ddpg_agent import Agent

Current device is: cuda:0
The GPU model is:  NVIDIA GeForce RTX 3070 Ti


In [2]:
envName = 'InvertedDoublePendulum-v2'
env = gym.make(envName)
env.reset(seed=0)
stateDim = env.observation_space.shape[0]
actionDim = env.action_space.shape[0]
print('The dimension of the state space is: ', stateDim)
print('The dimension of the action space is: ', actionDim)

  and should_run_async(code)


The dimension of the state space is:  11
The dimension of the action space is:  1


In [3]:
agents = Agent(state_size=stateDim, action_size=actionDim, random_seed=0, num_agents = 1) # instantiate the agent

In [4]:
# load the weights from file
agents.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
agents.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

for i in range(100):
    states = env.reset()
    for j in range(200):
        actionRaw = agents.act(states)
        actions = np.squeeze(actionRaw, axis=0)
        env.render()
        states, rewards, dones, _ = env.step(actions)
        if np.any(dones):
            break 
            
env.close()



Creating window glfw


In [None]:
import numpy as np
import random
x = 5
a = np.array([random.random() for i in range(5)])
b = np.random.normal(0, 1, 5)

In [None]:
print(a)

In [None]:
print(b)