In [1]:
import math, random

import gym
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd 
import torch.nn.functional as F

from common.layers import NoisyLinear
from common.replay_buffer import ReplayBuffer
import gym_Aircraft

from itertools import count
import time
import itertools
import matplotlib
import matplotlib.pyplot as plt

In [2]:
USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)

In [3]:
env_id = "acav-v0"
env = gym.make(env_id)



In [4]:
class RainbowDQN(nn.Module):
    def __init__(self, num_inputs, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowDQN, self).__init__()
        
        self.num_inputs   = num_inputs
        self.num_actions  = num_actions
        self.num_atoms    = num_atoms
        self.Vmin         = Vmin
        self.Vmax         = Vmax
        
        self.linear1 = nn.Linear(num_inputs, 32)
        self.linear2 = nn.Linear(32, 64)
        
        self.noisy_value1 = NoisyLinear(64, 64, use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(64, self.num_atoms, use_cuda=USE_CUDA)
        
        self.noisy_advantage1 = NoisyLinear(64, 64, use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(64, self.num_atoms * self.num_actions, use_cuda=USE_CUDA)
        
    def forward(self, x):
        batch_size = x.size(0)
        
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        
        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)
        
        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)
        
        value     = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions, self.num_atoms)
        
        x = value + advantage - advantage.mean(1, keepdim=True)
        x = F.softmax(x.view(-1, self.num_atoms)).view(-1, self.num_actions, self.num_atoms)
        
        return x
        
    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()
    
    def act(self, state):
        state = Variable(torch.FloatTensor(state).unsqueeze(0), volatile=True)
        dist = self.forward(state).data.cpu()
        dist = dist * torch.linspace(self.Vmin, self.Vmax, self.num_atoms)
        action = dist.sum(2).max(1)[1].numpy()[0]
        return action

In [5]:
num_atoms = 51
Vmin = -10
Vmax = 10

current_model = RainbowDQN(env.observation_space.shape[0], env.action_space.n, num_atoms, Vmin, Vmax)
target_model  = RainbowDQN(env.observation_space.shape[0], env.action_space.n, num_atoms, Vmin, Vmax)

if USE_CUDA:
    current_model = current_model.cuda()
    target_model  = target_model.cuda()
    
optimizer = optim.Adam(current_model.parameters(), 0.001)

replay_buffer = ReplayBuffer(10000)

In [6]:
def update_target(current_model, target_model):
    target_model.load_state_dict(current_model.state_dict())
    
update_target(current_model, target_model)

In [7]:
def projection_distribution(next_state, rewards, dones):
    batch_size  = next_state.size(0)
    
    delta_z = float(Vmax - Vmin) / (num_atoms - 1)
    support = torch.linspace(Vmin, Vmax, num_atoms)
    
    next_dist   = target_model(next_state).data.cpu() * support
    next_action = next_dist.sum(2).max(1)[1]
    next_action = next_action.unsqueeze(1).unsqueeze(1).expand(next_dist.size(0), 1, next_dist.size(2))
    next_dist   = next_dist.gather(1, next_action).squeeze(1)
        
    rewards = rewards.unsqueeze(1).expand_as(next_dist)
    dones   = dones.unsqueeze(1).expand_as(next_dist)
    support = support.unsqueeze(0).expand_as(next_dist)
    
    Tz = rewards + (1 - dones) * 0.99 * support
    Tz = Tz.clamp(min=Vmin, max=Vmax)
    b  = (Tz - Vmin) / delta_z
    l  = b.floor().long()
    u  = b.ceil().long()
        
    offset = torch.linspace(0, (batch_size - 1) * num_atoms, batch_size).long()\
                    .unsqueeze(1).expand(batch_size, num_atoms)

    proj_dist = torch.zeros(next_dist.size())    
    proj_dist.view(-1).index_add_(0, (l + offset).view(-1), (next_dist * (u.float() - b)).view(-1))
    proj_dist.view(-1).index_add_(0, (u + offset).view(-1), (next_dist * (b - l.float())).view(-1))
        
    return proj_dist

In [8]:
def compute_td_loss(batch_size):
    state, action, reward, next_state, done = replay_buffer.sample(batch_size) 

    state      = Variable(torch.FloatTensor(np.float32(state)))
    next_state = Variable(torch.FloatTensor(np.float32(next_state)), volatile=True)
    action     = Variable(torch.LongTensor(action))
    reward     = torch.FloatTensor(reward)
    done       = torch.FloatTensor(np.float32(done))

    proj_dist = projection_distribution(next_state, reward, done)
    
    dist = current_model(state)
    action = action.unsqueeze(1).unsqueeze(1).expand(batch_size, 1, num_atoms)
    dist = dist.gather(1, action).squeeze(1)
    dist.data.clamp_(0.01, 0.99)
    loss = -(Variable(proj_dist) * dist.log()).sum(1)
    loss  = loss.mean()
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    current_model.reset_noise()
    target_model.reset_noise()
    
    return loss

In [None]:
num_episodes = 50000
batch_size = 32
gamma=0.99
TARGET_UPDATE=10

losses = []
total_res=[]
reward_list=[]
for i_episode in range(num_episodes):
    total_reward=0
    
    # 환경과 상태 초기화
    res_list=np.zeros(11)
    state = env.reset()

    for t in count():
        # 행동 선택과 수행
        
        action = current_model.act(state)
        
        next_state, reward, done, res = env.step(action)
        
        # 메모리에 변이 저장
        replay_buffer.push(state, action, reward, next_state, done)
        

        # 새로운 상태 관찰
        if not done:
            next_state = next_state
        else:
            next_state = None

        

        # 다음 상태로 이동
        state = next_state

     
        # Data save
        
        cmd_list,r_list,elev_list,azim_list,Pm_list,Pt_list,h_list=res
        Pm_list=Pm_list.tolist()
        Pt_list=Pt_list.tolist()
        merged_data=itertools.chain([cmd_list],[r_list],[elev_list],[azim_list],Pm_list,Pt_list,[h_list])
        merged_data=np.array(list(merged_data))
        res_list=np.vstack([res_list,merged_data])
        
        total_reward+=reward
        
        if done:
            res_list=np.delete(res_list,0,0)
            
            total_res.append(res_list)
            reward_list.append(total_reward)
            
            now = time.localtime()
            print ("%04d/%02d/%02d %02d:%02d:%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec))
            print("episode : {} | final step : {} | total reward : {}".format(i_episode, t, total_reward))
            break
        if len(replay_buffer) > batch_size:
            loss = compute_td_loss(batch_size)
            losses.append(loss.item())
            
        
    #목표 네트워크 업데이트, 모든 웨이트와 바이어스 복사
    if i_episode % TARGET_UPDATE == 0:
        update_target(current_model, target_model)

print('Complete')
env.close()

  


2020/10/05 11:47:07
episode : 0 | final step : 97 | total reward : 15.0
2020/10/05 11:47:08
episode : 1 | final step : 98 | total reward : 3.0
2020/10/05 11:47:09
episode : 2 | final step : 99 | total reward : 2.0
2020/10/05 11:47:10
episode : 3 | final step : 97 | total reward : 5.0
2020/10/05 11:47:11
episode : 4 | final step : 98 | total reward : 3.0
2020/10/05 11:47:12
episode : 5 | final step : 100 | total reward : 1.0
2020/10/05 11:47:13
episode : 6 | final step : 99 | total reward : 2.0
2020/10/05 11:47:14
episode : 7 | final step : 98 | total reward : 3.0
2020/10/05 11:47:15
episode : 8 | final step : 99 | total reward : 2.0
2020/10/05 11:47:16
episode : 9 | final step : 95 | total reward : 6.0
2020/10/05 11:47:17
episode : 10 | final step : 100 | total reward : 1.0
2020/10/05 11:47:18
episode : 11 | final step : 100 | total reward : 1.0
2020/10/05 11:47:19
episode : 12 | final step : 96 | total reward : 5.0
2020/10/05 11:47:20
episode : 13 | final step : 93 | total reward : 8.

2020/10/05 11:49:08
episode : 114 | final step : 98 | total reward : 3.0
2020/10/05 11:49:09
episode : 115 | final step : 99 | total reward : 2.0
2020/10/05 11:49:10
episode : 116 | final step : 99 | total reward : 2.0
2020/10/05 11:49:12
episode : 117 | final step : 100 | total reward : 1.0
2020/10/05 11:49:13
episode : 118 | final step : 99 | total reward : 2.0
2020/10/05 11:49:14
episode : 119 | final step : 97 | total reward : 4.0
2020/10/05 11:49:15
episode : 120 | final step : 100 | total reward : 1.0
2020/10/05 11:49:16
episode : 121 | final step : 97 | total reward : 4.0
2020/10/05 11:49:17
episode : 122 | final step : 99 | total reward : 2.0
2020/10/05 11:49:19
episode : 123 | final step : 100 | total reward : 1.0
2020/10/05 11:49:20
episode : 124 | final step : 99 | total reward : 2.0
2020/10/05 11:49:21
episode : 125 | final step : 100 | total reward : 1.0
2020/10/05 11:49:23
episode : 126 | final step : 99 | total reward : 2.0
2020/10/05 11:49:24
episode : 127 | final step 

2020/10/05 11:51:08
episode : 226 | final step : 97 | total reward : 4.0
2020/10/05 11:51:09
episode : 227 | final step : 97 | total reward : 4.0
2020/10/05 11:51:10
episode : 228 | final step : 97 | total reward : 4.0
2020/10/05 11:51:11
episode : 229 | final step : 99 | total reward : 2.0
2020/10/05 11:51:12
episode : 230 | final step : 100 | total reward : 1.0
2020/10/05 11:51:13
episode : 231 | final step : 97 | total reward : 4.0
2020/10/05 11:51:14
episode : 232 | final step : 99 | total reward : 2.0
2020/10/05 11:51:15
episode : 233 | final step : 95 | total reward : 6.0
2020/10/05 11:51:16
episode : 234 | final step : 94 | total reward : 7.0
2020/10/05 11:51:17
episode : 235 | final step : 100 | total reward : 1.0
2020/10/05 11:51:19
episode : 236 | final step : 99 | total reward : 2.0
2020/10/05 11:51:20
episode : 237 | final step : 100 | total reward : 1.0
2020/10/05 11:51:21
episode : 238 | final step : 98 | total reward : 3.0
2020/10/05 11:51:22
episode : 239 | final step :

2020/10/05 12:00:14
episode : 338 | final step : 99 | total reward : 2.0
2020/10/05 12:00:21
episode : 339 | final step : 99 | total reward : 2.0
2020/10/05 12:00:29
episode : 340 | final step : 99 | total reward : 2.0
2020/10/05 12:00:37
episode : 341 | final step : 99 | total reward : 2.0
2020/10/05 12:00:45
episode : 342 | final step : 99 | total reward : 2.0
2020/10/05 12:00:54
episode : 343 | final step : 100 | total reward : 1.0
2020/10/05 12:01:01
episode : 344 | final step : 100 | total reward : 1.0
2020/10/05 12:01:09
episode : 345 | final step : 100 | total reward : 1.0
2020/10/05 12:01:17
episode : 346 | final step : 96 | total reward : 5.0
2020/10/05 12:01:25
episode : 347 | final step : 100 | total reward : 1.0
2020/10/05 12:01:33
episode : 348 | final step : 99 | total reward : 2.0
2020/10/05 12:01:42
episode : 349 | final step : 98 | total reward : 3.0
2020/10/05 12:01:49
episode : 350 | final step : 97 | total reward : 4.0
2020/10/05 12:01:57
episode : 351 | final step 

2020/10/05 12:14:49
episode : 450 | final step : 99 | total reward : 2.0
2020/10/05 12:14:56
episode : 451 | final step : 97 | total reward : 4.0
2020/10/05 12:15:03
episode : 452 | final step : 99 | total reward : 2.0
2020/10/05 12:15:11
episode : 453 | final step : 99 | total reward : 2.0
2020/10/05 12:15:19
episode : 454 | final step : 98 | total reward : 3.0
2020/10/05 12:15:27
episode : 455 | final step : 100 | total reward : 1.0
2020/10/05 12:15:35
episode : 456 | final step : 99 | total reward : 2.0
2020/10/05 12:15:43
episode : 457 | final step : 98 | total reward : 3.0
2020/10/05 12:15:51
episode : 458 | final step : 97 | total reward : 4.0
2020/10/05 12:15:59
episode : 459 | final step : 94 | total reward : 7.0
2020/10/05 12:16:06
episode : 460 | final step : 97 | total reward : 4.0
2020/10/05 12:16:14
episode : 461 | final step : 99 | total reward : 2.0
2020/10/05 12:16:22
episode : 462 | final step : 100 | total reward : 1.0
2020/10/05 12:16:29
episode : 463 | final step : 

2020/10/05 12:29:25
episode : 562 | final step : 97 | total reward : 4.0
2020/10/05 12:29:32
episode : 563 | final step : 99 | total reward : 2.0
2020/10/05 12:29:40
episode : 564 | final step : 99 | total reward : 2.0
2020/10/05 12:29:48
episode : 565 | final step : 98 | total reward : 3.0
2020/10/05 12:29:55
episode : 566 | final step : 95 | total reward : 6.0
2020/10/05 12:30:03
episode : 567 | final step : 99 | total reward : 2.0
2020/10/05 12:30:11
episode : 568 | final step : 98 | total reward : 3.0
2020/10/05 12:30:19
episode : 569 | final step : 100 | total reward : 1.0
2020/10/05 12:30:27
episode : 570 | final step : 99 | total reward : 2.0
2020/10/05 12:30:35
episode : 571 | final step : 99 | total reward : 2.0
2020/10/05 12:30:42
episode : 572 | final step : 99 | total reward : 2.0
2020/10/05 12:30:51
episode : 573 | final step : 99 | total reward : 2.0
2020/10/05 12:30:59
episode : 574 | final step : 95 | total reward : 6.0
2020/10/05 12:31:06
episode : 575 | final step : 1

2020/10/05 12:43:58
episode : 674 | final step : 98 | total reward : 3.0
2020/10/05 12:44:06
episode : 675 | final step : 99 | total reward : 2.0
2020/10/05 12:44:14
episode : 676 | final step : 100 | total reward : 1.0
2020/10/05 12:44:21
episode : 677 | final step : 99 | total reward : 2.0
2020/10/05 12:44:29
episode : 678 | final step : 100 | total reward : 1.0
2020/10/05 12:44:37
episode : 679 | final step : 100 | total reward : 1.0
2020/10/05 12:44:45
episode : 680 | final step : 99 | total reward : 2.0
2020/10/05 12:44:53
episode : 681 | final step : 98 | total reward : 3.0
2020/10/05 12:45:00
episode : 682 | final step : 92 | total reward : 9.0
2020/10/05 12:45:08
episode : 683 | final step : 97 | total reward : 4.0
2020/10/05 12:45:16
episode : 684 | final step : 99 | total reward : 2.0
2020/10/05 12:45:24
episode : 685 | final step : 95 | total reward : 6.0
2020/10/05 12:45:32
episode : 686 | final step : 95 | total reward : 6.0
2020/10/05 12:45:39
episode : 687 | final step :

2020/10/05 12:58:30
episode : 786 | final step : 92 | total reward : 9.0
2020/10/05 12:58:38
episode : 787 | final step : 98 | total reward : 3.0
2020/10/05 12:58:46
episode : 788 | final step : 99 | total reward : 2.0
2020/10/05 12:58:54
episode : 789 | final step : 97 | total reward : 4.0
2020/10/05 12:59:01
episode : 790 | final step : 99 | total reward : 2.0
2020/10/05 12:59:09
episode : 791 | final step : 98 | total reward : 3.0
2020/10/05 12:59:17
episode : 792 | final step : 99 | total reward : 2.0
2020/10/05 12:59:25
episode : 793 | final step : 100 | total reward : 1.0
2020/10/05 12:59:32
episode : 794 | final step : 94 | total reward : 7.0
2020/10/05 12:59:40
episode : 795 | final step : 97 | total reward : 4.0
2020/10/05 12:59:48
episode : 796 | final step : 100 | total reward : 1.0
2020/10/05 12:59:56
episode : 797 | final step : 100 | total reward : 1.0
2020/10/05 13:00:03
episode : 798 | final step : 96 | total reward : 5.0
2020/10/05 13:00:11
episode : 799 | final step :

2020/10/05 13:13:16
episode : 898 | final step : 100 | total reward : 1.0
2020/10/05 13:13:25
episode : 899 | final step : 100 | total reward : 1.0
2020/10/05 13:13:32
episode : 900 | final step : 92 | total reward : 9.0
2020/10/05 13:13:40
episode : 901 | final step : 99 | total reward : 2.0
2020/10/05 13:13:48
episode : 902 | final step : 91 | total reward : 10.0
2020/10/05 13:13:56
episode : 903 | final step : 99 | total reward : 2.0
2020/10/05 13:14:04
episode : 904 | final step : 100 | total reward : 1.0
2020/10/05 13:14:12
episode : 905 | final step : 95 | total reward : 6.0
2020/10/05 13:14:20
episode : 906 | final step : 100 | total reward : 1.0
2020/10/05 13:14:28
episode : 907 | final step : 100 | total reward : 1.0
2020/10/05 13:14:36
episode : 908 | final step : 97 | total reward : 4.0
2020/10/05 13:14:43
episode : 909 | final step : 99 | total reward : 2.0
2020/10/05 13:14:51
episode : 910 | final step : 99 | total reward : 2.0
2020/10/05 13:14:58
episode : 911 | final ste

2020/10/05 13:27:59
episode : 1010 | final step : 97 | total reward : 4.0
2020/10/05 13:28:07
episode : 1011 | final step : 99 | total reward : 2.0
2020/10/05 13:28:15
episode : 1012 | final step : 100 | total reward : 1.0
2020/10/05 13:28:22
episode : 1013 | final step : 97 | total reward : 4.0
2020/10/05 13:28:30
episode : 1014 | final step : 98 | total reward : 3.0
2020/10/05 13:28:37
episode : 1015 | final step : 92 | total reward : 9.0
2020/10/05 13:28:45
episode : 1016 | final step : 100 | total reward : 1.0
2020/10/05 13:28:53
episode : 1017 | final step : 100 | total reward : 1.0
2020/10/05 13:29:01
episode : 1018 | final step : 97 | total reward : 4.0
2020/10/05 13:29:08
episode : 1019 | final step : 99 | total reward : 2.0
2020/10/05 13:29:16
episode : 1020 | final step : 94 | total reward : 7.0
2020/10/05 13:29:23
episode : 1021 | final step : 96 | total reward : 5.0
2020/10/05 13:29:31
episode : 1022 | final step : 99 | total reward : 2.0
2020/10/05 13:29:39
episode : 1023 

2020/10/05 13:42:22
episode : 1121 | final step : 99 | total reward : 2.0
2020/10/05 13:42:30
episode : 1122 | final step : 96 | total reward : 5.0
2020/10/05 13:42:38
episode : 1123 | final step : 100 | total reward : 1.0
2020/10/05 13:42:45
episode : 1124 | final step : 96 | total reward : 5.0
2020/10/05 13:42:53
episode : 1125 | final step : 99 | total reward : 2.0
2020/10/05 13:43:00
episode : 1126 | final step : 99 | total reward : 2.0
2020/10/05 13:43:08
episode : 1127 | final step : 100 | total reward : 1.0
2020/10/05 13:43:16
episode : 1128 | final step : 98 | total reward : 3.0
2020/10/05 13:43:24
episode : 1129 | final step : 96 | total reward : 5.0
2020/10/05 13:43:32
episode : 1130 | final step : 99 | total reward : 2.0
2020/10/05 13:43:40
episode : 1131 | final step : 100 | total reward : 1.0
2020/10/05 13:43:48
episode : 1132 | final step : 100 | total reward : 1.0
2020/10/05 13:43:56
episode : 1133 | final step : 100 | total reward : 1.0
2020/10/05 13:44:04
episode : 113

2020/10/05 13:56:58
episode : 1232 | final step : 96 | total reward : 5.0
2020/10/05 13:57:06
episode : 1233 | final step : 98 | total reward : 3.0
2020/10/05 13:57:14
episode : 1234 | final step : 96 | total reward : 5.0
2020/10/05 13:57:21
episode : 1235 | final step : 95 | total reward : 6.0
2020/10/05 13:57:29
episode : 1236 | final step : 95 | total reward : 6.0
2020/10/05 13:57:36
episode : 1237 | final step : 99 | total reward : 2.0
2020/10/05 13:57:44
episode : 1238 | final step : 97 | total reward : 4.0
2020/10/05 13:57:51
episode : 1239 | final step : 99 | total reward : 2.0
2020/10/05 13:57:59
episode : 1240 | final step : 96 | total reward : 5.0
2020/10/05 13:58:07
episode : 1241 | final step : 99 | total reward : 2.0
2020/10/05 13:58:15
episode : 1242 | final step : 100 | total reward : 1.0
2020/10/05 13:58:23
episode : 1243 | final step : 93 | total reward : 8.0
2020/10/05 13:58:30
episode : 1244 | final step : 91 | total reward : 10.0
2020/10/05 13:58:38
episode : 1245 |

2020/10/05 14:11:27
episode : 1343 | final step : 99 | total reward : 2.0
2020/10/05 14:11:36
episode : 1344 | final step : 100 | total reward : 1.0
2020/10/05 14:11:43
episode : 1345 | final step : 91 | total reward : 10.0
2020/10/05 14:11:51
episode : 1346 | final step : 100 | total reward : 1.0
2020/10/05 14:11:59
episode : 1347 | final step : 99 | total reward : 2.0
2020/10/05 14:12:07
episode : 1348 | final step : 100 | total reward : 1.0
2020/10/05 14:12:15
episode : 1349 | final step : 99 | total reward : 2.0
2020/10/05 14:12:23
episode : 1350 | final step : 100 | total reward : 1.0
2020/10/05 14:12:31
episode : 1351 | final step : 99 | total reward : 2.0
2020/10/05 14:12:39
episode : 1352 | final step : 96 | total reward : 5.0
2020/10/05 14:12:47
episode : 1353 | final step : 100 | total reward : 1.0
2020/10/05 14:12:55
episode : 1354 | final step : 99 | total reward : 2.0
2020/10/05 14:13:03
episode : 1355 | final step : 98 | total reward : 3.0
2020/10/05 14:13:11
episode : 13

2020/10/05 14:25:59
episode : 1454 | final step : 97 | total reward : 4.0
2020/10/05 14:26:07
episode : 1455 | final step : 99 | total reward : 2.0
2020/10/05 14:26:15
episode : 1456 | final step : 100 | total reward : 1.0
2020/10/05 14:26:23
episode : 1457 | final step : 100 | total reward : 1.0
2020/10/05 14:26:31
episode : 1458 | final step : 100 | total reward : 1.0
2020/10/05 14:26:39
episode : 1459 | final step : 98 | total reward : 3.0
2020/10/05 14:26:46
episode : 1460 | final step : 100 | total reward : 1.0
2020/10/05 14:26:55
episode : 1461 | final step : 100 | total reward : 1.0
2020/10/05 14:27:03
episode : 1462 | final step : 100 | total reward : 1.0
2020/10/05 14:27:11
episode : 1463 | final step : 95 | total reward : 6.0
2020/10/05 14:27:19
episode : 1464 | final step : 99 | total reward : 2.0
2020/10/05 14:27:26
episode : 1465 | final step : 99 | total reward : 2.0
2020/10/05 14:27:34
episode : 1466 | final step : 98 | total reward : 3.0
2020/10/05 14:27:42
episode : 14

2020/10/05 14:40:29
episode : 1565 | final step : 100 | total reward : 1.0
2020/10/05 14:40:36
episode : 1566 | final step : 97 | total reward : 4.0
2020/10/05 14:40:45
episode : 1567 | final step : 99 | total reward : 2.0
2020/10/05 14:40:53
episode : 1568 | final step : 100 | total reward : 1.0
2020/10/05 14:41:00
episode : 1569 | final step : 96 | total reward : 5.0
2020/10/05 14:41:08
episode : 1570 | final step : 96 | total reward : 5.0
2020/10/05 14:41:16
episode : 1571 | final step : 100 | total reward : 1.0
2020/10/05 14:41:24
episode : 1572 | final step : 100 | total reward : 1.0
2020/10/05 14:41:33
episode : 1573 | final step : 96 | total reward : 5.0
2020/10/05 14:41:40
episode : 1574 | final step : 95 | total reward : 6.0
2020/10/05 14:41:47
episode : 1575 | final step : 100 | total reward : 1.0
2020/10/05 14:41:56
episode : 1576 | final step : 100 | total reward : 1.0
2020/10/05 14:42:04
episode : 1577 | final step : 98 | total reward : 3.0
2020/10/05 14:42:12
episode : 15

2020/10/05 14:54:48
episode : 1676 | final step : 95 | total reward : 6.0
2020/10/05 14:54:55
episode : 1677 | final step : 90 | total reward : 11.0
2020/10/05 14:55:03
episode : 1678 | final step : 100 | total reward : 1.0
2020/10/05 14:55:11
episode : 1679 | final step : 99 | total reward : 2.0
2020/10/05 14:55:19
episode : 1680 | final step : 100 | total reward : 1.0
2020/10/05 14:55:27
episode : 1681 | final step : 98 | total reward : 3.0
2020/10/05 14:55:35
episode : 1682 | final step : 99 | total reward : 2.0
2020/10/05 14:55:43
episode : 1683 | final step : 99 | total reward : 2.0
2020/10/05 14:55:51
episode : 1684 | final step : 99 | total reward : 2.0
2020/10/05 14:55:59
episode : 1685 | final step : 100 | total reward : 1.0
2020/10/05 14:56:06
episode : 1686 | final step : 94 | total reward : 7.0
2020/10/05 14:56:14
episode : 1687 | final step : 98 | total reward : 3.0
2020/10/05 14:56:22
episode : 1688 | final step : 100 | total reward : 1.0
2020/10/05 14:56:29
episode : 168

2020/10/05 15:09:10
episode : 1787 | final step : 98 | total reward : 3.0
2020/10/05 15:09:18
episode : 1788 | final step : 97 | total reward : 4.0
2020/10/05 15:09:26
episode : 1789 | final step : 99 | total reward : 2.0
2020/10/05 15:09:34
episode : 1790 | final step : 99 | total reward : 2.0
2020/10/05 15:09:42
episode : 1791 | final step : 99 | total reward : 2.0
2020/10/05 15:09:50
episode : 1792 | final step : 100 | total reward : 1.0
2020/10/05 15:09:57
episode : 1793 | final step : 93 | total reward : 8.0
2020/10/05 15:10:04
episode : 1794 | final step : 91 | total reward : 10.0
2020/10/05 15:10:11
episode : 1795 | final step : 95 | total reward : 6.0
2020/10/05 15:10:19
episode : 1796 | final step : 99 | total reward : 2.0
2020/10/05 15:10:26
episode : 1797 | final step : 93 | total reward : 8.0
2020/10/05 15:10:34
episode : 1798 | final step : 98 | total reward : 3.0
2020/10/05 15:10:42
episode : 1799 | final step : 99 | total reward : 2.0
2020/10/05 15:10:49
episode : 1800 |

2020/10/05 15:23:32
episode : 1898 | final step : 94 | total reward : 7.0
2020/10/05 15:23:40
episode : 1899 | final step : 100 | total reward : 1.0
2020/10/05 15:23:47
episode : 1900 | final step : 91 | total reward : 10.0
2020/10/05 15:23:55
episode : 1901 | final step : 98 | total reward : 3.0
2020/10/05 15:24:03
episode : 1902 | final step : 99 | total reward : 2.0
2020/10/05 15:24:11
episode : 1903 | final step : 98 | total reward : 3.0
2020/10/05 15:24:18
episode : 1904 | final step : 99 | total reward : 2.0
2020/10/05 15:24:26
episode : 1905 | final step : 99 | total reward : 2.0
2020/10/05 15:24:34
episode : 1906 | final step : 100 | total reward : 1.0
2020/10/05 15:24:42
episode : 1907 | final step : 99 | total reward : 2.0
2020/10/05 15:24:49
episode : 1908 | final step : 98 | total reward : 3.0
2020/10/05 15:24:57
episode : 1909 | final step : 100 | total reward : 1.0
2020/10/05 15:25:05
episode : 1910 | final step : 99 | total reward : 2.0
2020/10/05 15:25:13
episode : 1911

2020/10/05 15:39:02
episode : 2009 | final step : 98 | total reward : 3.0
2020/10/05 15:39:11
episode : 2010 | final step : 99 | total reward : 2.0
2020/10/05 15:39:21
episode : 2011 | final step : 98 | total reward : 3.0
2020/10/05 15:39:31
episode : 2012 | final step : 99 | total reward : 2.0
2020/10/05 15:39:40
episode : 2013 | final step : 100 | total reward : 1.0
2020/10/05 15:39:50
episode : 2014 | final step : 98 | total reward : 3.0
2020/10/05 15:39:59
episode : 2015 | final step : 97 | total reward : 4.0
2020/10/05 15:40:09
episode : 2016 | final step : 100 | total reward : 1.0
2020/10/05 15:40:18
episode : 2017 | final step : 99 | total reward : 2.0
2020/10/05 15:40:28
episode : 2018 | final step : 96 | total reward : 5.0
2020/10/05 15:40:39
episode : 2019 | final step : 100 | total reward : 1.0
2020/10/05 15:40:48
episode : 2020 | final step : 99 | total reward : 2.0
2020/10/05 15:40:58
episode : 2021 | final step : 99 | total reward : 2.0
2020/10/05 15:41:08
episode : 2022 

2020/10/05 15:56:59
episode : 2120 | final step : 100 | total reward : 1.0
2020/10/05 15:57:08
episode : 2121 | final step : 95 | total reward : 6.0
2020/10/05 15:57:18
episode : 2122 | final step : 95 | total reward : 6.0
2020/10/05 15:57:27
episode : 2123 | final step : 100 | total reward : 1.0
2020/10/05 15:57:37
episode : 2124 | final step : 98 | total reward : 3.0
2020/10/05 15:57:47
episode : 2125 | final step : 100 | total reward : 1.0
2020/10/05 15:57:56
episode : 2126 | final step : 96 | total reward : 5.0
2020/10/05 15:58:06
episode : 2127 | final step : 100 | total reward : 1.0
2020/10/05 15:58:16
episode : 2128 | final step : 99 | total reward : 2.0
2020/10/05 15:58:26
episode : 2129 | final step : 100 | total reward : 1.0
2020/10/05 15:58:36
episode : 2130 | final step : 98 | total reward : 3.0
2020/10/05 15:58:45
episode : 2131 | final step : 96 | total reward : 5.0
2020/10/05 15:58:54
episode : 2132 | final step : 100 | total reward : 1.0
2020/10/05 15:59:04
episode : 21

2020/10/05 16:14:16
episode : 2231 | final step : 100 | total reward : 1.0
2020/10/05 16:14:26
episode : 2232 | final step : 100 | total reward : 1.0
2020/10/05 16:14:36
episode : 2233 | final step : 99 | total reward : 2.0
2020/10/05 16:14:46
episode : 2234 | final step : 97 | total reward : 4.0
2020/10/05 16:14:56
episode : 2235 | final step : 98 | total reward : 3.0
2020/10/05 16:15:07
episode : 2236 | final step : 99 | total reward : 2.0
2020/10/05 16:15:16
episode : 2237 | final step : 99 | total reward : 2.0
2020/10/05 16:15:25
episode : 2238 | final step : 95 | total reward : 6.0
2020/10/05 16:15:34
episode : 2239 | final step : 100 | total reward : 1.0
2020/10/05 16:15:44
episode : 2240 | final step : 99 | total reward : 2.0
2020/10/05 16:15:53
episode : 2241 | final step : 100 | total reward : 1.0
2020/10/05 16:16:03
episode : 2242 | final step : 100 | total reward : 1.0
2020/10/05 16:16:13
episode : 2243 | final step : 100 | total reward : 1.0
2020/10/05 16:16:22
episode : 22

2020/10/05 16:31:09
episode : 2342 | final step : 93 | total reward : 8.0
2020/10/05 16:31:17
episode : 2343 | final step : 100 | total reward : 1.0
2020/10/05 16:31:25
episode : 2344 | final step : 99 | total reward : 2.0
2020/10/05 16:31:33
episode : 2345 | final step : 99 | total reward : 2.0
2020/10/05 16:31:41
episode : 2346 | final step : 97 | total reward : 4.0
2020/10/05 16:31:49
episode : 2347 | final step : 100 | total reward : 1.0
2020/10/05 16:31:56
episode : 2348 | final step : 95 | total reward : 6.0
2020/10/05 16:32:04
episode : 2349 | final step : 99 | total reward : 2.0
2020/10/05 16:32:12
episode : 2350 | final step : 97 | total reward : 4.0
2020/10/05 16:32:20
episode : 2351 | final step : 100 | total reward : 1.0
2020/10/05 16:32:27
episode : 2352 | final step : 93 | total reward : 8.0
2020/10/05 16:32:35
episode : 2353 | final step : 100 | total reward : 1.0
2020/10/05 16:32:43
episode : 2354 | final step : 100 | total reward : 1.0
2020/10/05 16:32:51
episode : 235

2020/10/05 16:47:17
episode : 2453 | final step : 97 | total reward : 4.0
2020/10/05 16:47:27
episode : 2454 | final step : 98 | total reward : 3.0
2020/10/05 16:47:36
episode : 2455 | final step : 100 | total reward : 1.0
2020/10/05 16:47:45
episode : 2456 | final step : 99 | total reward : 2.0
2020/10/05 16:47:55
episode : 2457 | final step : 100 | total reward : 1.0
2020/10/05 16:48:04
episode : 2458 | final step : 100 | total reward : 1.0
2020/10/05 16:48:12
episode : 2459 | final step : 91 | total reward : 10.0
2020/10/05 16:48:21
episode : 2460 | final step : 98 | total reward : 3.0
2020/10/05 16:48:31
episode : 2461 | final step : 97 | total reward : 4.0
2020/10/05 16:48:40
episode : 2462 | final step : 99 | total reward : 2.0
2020/10/05 16:48:50
episode : 2463 | final step : 98 | total reward : 3.0
2020/10/05 16:48:59
episode : 2464 | final step : 100 | total reward : 1.0
2020/10/05 16:49:08
episode : 2465 | final step : 94 | total reward : 7.0
2020/10/05 16:49:17
episode : 246

2020/10/05 17:04:23
episode : 2564 | final step : 97 | total reward : 4.0
2020/10/05 17:04:32
episode : 2565 | final step : 100 | total reward : 1.0
2020/10/05 17:04:41
episode : 2566 | final step : 95 | total reward : 6.0
2020/10/05 17:04:50
episode : 2567 | final step : 99 | total reward : 2.0
2020/10/05 17:05:00
episode : 2568 | final step : 99 | total reward : 2.0
2020/10/05 17:05:10
episode : 2569 | final step : 98 | total reward : 3.0
2020/10/05 17:05:19
episode : 2570 | final step : 91 | total reward : 10.0
2020/10/05 17:05:28
episode : 2571 | final step : 99 | total reward : 2.0
2020/10/05 17:05:38
episode : 2572 | final step : 100 | total reward : 1.0
2020/10/05 17:05:47
episode : 2573 | final step : 96 | total reward : 5.0
2020/10/05 17:05:57
episode : 2574 | final step : 100 | total reward : 1.0
2020/10/05 17:06:06
episode : 2575 | final step : 98 | total reward : 3.0
2020/10/05 17:06:15
episode : 2576 | final step : 100 | total reward : 1.0
2020/10/05 17:06:24
episode : 257

2020/10/05 17:21:23
episode : 2675 | final step : 96 | total reward : 5.0
2020/10/05 17:21:33
episode : 2676 | final step : 99 | total reward : 2.0
2020/10/05 17:21:42
episode : 2677 | final step : 99 | total reward : 2.0
2020/10/05 17:21:51
episode : 2678 | final step : 94 | total reward : 7.0
2020/10/05 17:22:00
episode : 2679 | final step : 99 | total reward : 2.0
2020/10/05 17:22:10
episode : 2680 | final step : 98 | total reward : 3.0
2020/10/05 17:22:20
episode : 2681 | final step : 97 | total reward : 4.0
2020/10/05 17:22:29
episode : 2682 | final step : 100 | total reward : 1.0
2020/10/05 17:22:38
episode : 2683 | final step : 100 | total reward : 1.0
2020/10/05 17:22:47
episode : 2684 | final step : 94 | total reward : 7.0
2020/10/05 17:22:55
episode : 2685 | final step : 95 | total reward : 6.0
2020/10/05 17:23:04
episode : 2686 | final step : 96 | total reward : 5.0
2020/10/05 17:23:13
episode : 2687 | final step : 99 | total reward : 2.0
2020/10/05 17:23:22
episode : 2688 |

2020/10/05 17:37:42
episode : 2786 | final step : 100 | total reward : 1.0
2020/10/05 17:37:51
episode : 2787 | final step : 96 | total reward : 5.0
2020/10/05 17:37:59
episode : 2788 | final step : 91 | total reward : 10.0
2020/10/05 17:38:07
episode : 2789 | final step : 99 | total reward : 2.0
2020/10/05 17:38:15
episode : 2790 | final step : 100 | total reward : 1.0
2020/10/05 17:38:24
episode : 2791 | final step : 99 | total reward : 2.0
2020/10/05 17:38:33
episode : 2792 | final step : 96 | total reward : 5.0
2020/10/05 17:38:41
episode : 2793 | final step : 99 | total reward : 2.0
2020/10/05 17:38:50
episode : 2794 | final step : 100 | total reward : 1.0
2020/10/05 17:38:59
episode : 2795 | final step : 100 | total reward : 1.0
2020/10/05 17:39:09
episode : 2796 | final step : 100 | total reward : 1.0
2020/10/05 17:39:17
episode : 2797 | final step : 97 | total reward : 4.0
2020/10/05 17:39:26
episode : 2798 | final step : 96 | total reward : 5.0
2020/10/05 17:39:35
episode : 27

In [None]:
plt.figure(figsize=(15,10))
plt.xlabel("Episode")
plt.ylabel("Total rewards")
plt.plot(reward_list)

In [None]:
def moving_average(data,n):
    len_data=len(data)-n
    res_data=np.zeros(len_data)

    for i in range(len_data):
        res_data[i]=sum(data[i:i+n])/n
    return res_data

In [None]:
average_number=100
filtered_data=moving_average(reward_list,average_number)
plt.figure(figsize=(15,10))
plt.xlabel("Episode")
plt.ylabel("Total rewards")
plt.plot(filtered_data)

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[99]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[9996]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[49999]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()