In [1]:
import math, random

import gym
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd 
import torch.nn.functional as F

from common.layers import NoisyLinear
from common.replay_buffer import ReplayBuffer
import gym_Aircraft

from itertools import count
import time
import itertools
import matplotlib
import matplotlib.pyplot as plt

In [2]:
USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)

In [3]:
env_id = "acav-v0"
env = gym.make(env_id)



In [4]:
class RainbowDQN(nn.Module):
    def __init__(self, num_inputs, num_actions, num_atoms, Vmin, Vmax):
        super(RainbowDQN, self).__init__()
        
        self.num_inputs   = num_inputs
        self.num_actions  = num_actions
        self.num_atoms    = num_atoms
        self.Vmin         = Vmin
        self.Vmax         = Vmax
        
        self.linear1 = nn.Linear(num_inputs, 32)
        self.linear2 = nn.Linear(32, 64)
        
        self.noisy_value1 = NoisyLinear(64, 64, use_cuda=USE_CUDA)
        self.noisy_value2 = NoisyLinear(64, self.num_atoms, use_cuda=USE_CUDA)
        
        self.noisy_advantage1 = NoisyLinear(64, 64, use_cuda=USE_CUDA)
        self.noisy_advantage2 = NoisyLinear(64, self.num_atoms * self.num_actions, use_cuda=USE_CUDA)
        
    def forward(self, x):
        batch_size = x.size(0)
        
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        
        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)
        
        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)
        
        value     = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions, self.num_atoms)
        
        x = value + advantage - advantage.mean(1, keepdim=True)
        x = F.softmax(x.view(-1, self.num_atoms)).view(-1, self.num_actions, self.num_atoms)
        
        return x
        
    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()
    
    def act(self, state):
        state = Variable(torch.FloatTensor(state).unsqueeze(0), volatile=True)
        dist = self.forward(state).data.cpu()
        dist = dist * torch.linspace(self.Vmin, self.Vmax, self.num_atoms)
        action = dist.sum(2).max(1)[1].numpy()[0]
        return action

In [5]:
num_atoms = 51
Vmin = -10
Vmax = 10

current_model = RainbowDQN(env.observation_space.shape[0], env.action_space.n, num_atoms, Vmin, Vmax)
target_model  = RainbowDQN(env.observation_space.shape[0], env.action_space.n, num_atoms, Vmin, Vmax)

if USE_CUDA:
    current_model = current_model.cuda()
    target_model  = target_model.cuda()
    
optimizer = optim.Adam(current_model.parameters(), 0.001)

replay_buffer = ReplayBuffer(10000)

In [6]:
def update_target(current_model, target_model):
    target_model.load_state_dict(current_model.state_dict())
    
update_target(current_model, target_model)

In [7]:
def projection_distribution(next_state, rewards, dones):
    batch_size  = next_state.size(0)
    
    delta_z = float(Vmax - Vmin) / (num_atoms - 1)
    support = torch.linspace(Vmin, Vmax, num_atoms)
    
    next_dist   = target_model(next_state).data.cpu() * support
    next_action = next_dist.sum(2).max(1)[1]
    next_action = next_action.unsqueeze(1).unsqueeze(1).expand(next_dist.size(0), 1, next_dist.size(2))
    next_dist   = next_dist.gather(1, next_action).squeeze(1)
        
    rewards = rewards.unsqueeze(1).expand_as(next_dist)
    dones   = dones.unsqueeze(1).expand_as(next_dist)
    support = support.unsqueeze(0).expand_as(next_dist)
    
    Tz = rewards + (1 - dones) * 0.99 * support
    Tz = Tz.clamp(min=Vmin, max=Vmax)
    b  = (Tz - Vmin) / delta_z
    l  = b.floor().long()
    u  = b.ceil().long()
        
    offset = torch.linspace(0, (batch_size - 1) * num_atoms, batch_size).long()\
                    .unsqueeze(1).expand(batch_size, num_atoms)

    proj_dist = torch.zeros(next_dist.size())    
    proj_dist.view(-1).index_add_(0, (l + offset).view(-1), (next_dist * (u.float() - b)).view(-1))
    proj_dist.view(-1).index_add_(0, (u + offset).view(-1), (next_dist * (b - l.float())).view(-1))
        
    return proj_dist

In [8]:
def compute_td_loss(batch_size):
    state, action, reward, next_state, done = replay_buffer.sample(batch_size) 

    state      = Variable(torch.FloatTensor(np.float32(state)))
    next_state = Variable(torch.FloatTensor(np.float32(next_state)), volatile=True)
    action     = Variable(torch.LongTensor(action))
    reward     = torch.FloatTensor(reward)
    done       = torch.FloatTensor(np.float32(done))

    proj_dist = projection_distribution(next_state, reward, done)
    
    dist = current_model(state)
    action = action.unsqueeze(1).unsqueeze(1).expand(batch_size, 1, num_atoms)
    dist = dist.gather(1, action).squeeze(1)
    dist.data.clamp_(0.01, 0.99)
    loss = -(Variable(proj_dist) * dist.log()).sum(1)
    loss  = loss.mean()
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    current_model.reset_noise()
    target_model.reset_noise()
    
    return loss

In [None]:
num_episodes = 50000
batch_size = 32
gamma=0.99
TARGET_UPDATE=10

losses = []
total_res=[]
reward_list=[]
for i_episode in range(num_episodes):
    total_reward=0
    
    # 환경과 상태 초기화
    res_list=np.zeros(11)
    state = env.reset()

    for t in count():
        # 행동 선택과 수행
        
        action = current_model.act(state)
        
        next_state, reward, done, res = env.step(action)
        
        # 메모리에 변이 저장
        replay_buffer.push(state, action, reward, next_state, done)
        

        # 새로운 상태 관찰
        if not done:
            next_state = next_state
        else:
            next_state = None

        

        # 다음 상태로 이동
        state = next_state

     
        # Data save
        
        cmd_list,r_list,elev_list,azim_list,Pm_list,Pt_list,h_list=res
        Pm_list=Pm_list.tolist()
        Pt_list=Pt_list.tolist()
        merged_data=itertools.chain([cmd_list],[r_list],[elev_list],[azim_list],Pm_list,Pt_list,[h_list])
        merged_data=np.array(list(merged_data))
        res_list=np.vstack([res_list,merged_data])
        
        total_reward+=reward
        
        if done:
            res_list=np.delete(res_list,0,0)
            
            total_res.append(res_list)
            reward_list.append(total_reward)
            
            now = time.localtime()
            print ("%04d/%02d/%02d %02d:%02d:%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec))
            print("episode : {} | final step : {} | total reward : {}".format(i_episode, t, total_reward))
            break
        if len(replay_buffer) > batch_size:
            loss = compute_td_loss(batch_size)
            losses.append(loss.item())
            
        
    #목표 네트워크 업데이트, 모든 웨이트와 바이어스 복사
    if i_episode % TARGET_UPDATE == 0:
        update_target(current_model, target_model)

print('Complete')
env.close()

  


2020/10/05 11:51:54
episode : 0 | final step : 97 | total reward : 45.63230462442321
2020/10/05 11:51:59
episode : 1 | final step : 99 | total reward : -4.288625482462095
2020/10/05 11:52:04
episode : 2 | final step : 96 | total reward : -9.180895630449442
2020/10/05 11:52:09
episode : 3 | final step : 99 | total reward : -8.817127704061392
2020/10/05 11:52:14
episode : 4 | final step : 100 | total reward : -3.936954315538628
2020/10/05 11:52:19
episode : 5 | final step : 96 | total reward : -13.076788164673033
2020/10/05 11:52:24
episode : 6 | final step : 96 | total reward : -14.597752504526582
2020/10/05 11:52:30
episode : 7 | final step : 98 | total reward : -4.50272809680942
2020/10/05 11:52:34
episode : 8 | final step : 91 | total reward : -19.974712253697405
2020/10/05 11:52:39
episode : 9 | final step : 100 | total reward : -6.475468974188985
2020/10/05 11:52:44
episode : 10 | final step : 99 | total reward : -7.592494842836942
2020/10/05 11:52:49
episode : 11 | final step : 99

2020/10/05 12:02:39
episode : 94 | final step : 97 | total reward : -6.9923260435764405
2020/10/05 12:02:46
episode : 95 | final step : 100 | total reward : -7.016112771939646
2020/10/05 12:02:54
episode : 96 | final step : 100 | total reward : -6.9898639038881925
2020/10/05 12:03:02
episode : 97 | final step : 99 | total reward : -3.184618003880672
2020/10/05 12:03:10
episode : 98 | final step : 100 | total reward : -8.210403008903938
2020/10/05 12:03:18
episode : 99 | final step : 100 | total reward : -3.9411496090036167
2020/10/05 12:03:26
episode : 100 | final step : 99 | total reward : -3.2241926429667416
2020/10/05 12:03:34
episode : 101 | final step : 99 | total reward : -4.67080426660533
2020/10/05 12:03:42
episode : 102 | final step : 100 | total reward : -3.9870964616479796
2020/10/05 12:03:49
episode : 103 | final step : 99 | total reward : -10.270012923795335
2020/10/05 12:03:57
episode : 104 | final step : 98 | total reward : -4.871713936950542
2020/10/05 12:04:05
episode 

2020/10/05 12:14:36
episode : 187 | final step : 99 | total reward : -3.453511248487885
2020/10/05 12:14:43
episode : 188 | final step : 91 | total reward : -22.70501744394771
2020/10/05 12:14:51
episode : 189 | final step : 95 | total reward : -12.104196096317338
2020/10/05 12:14:58
episode : 190 | final step : 98 | total reward : -6.847758688167573
2020/10/05 12:15:06
episode : 191 | final step : 96 | total reward : -9.266065282348976
2020/10/05 12:15:14
episode : 192 | final step : 98 | total reward : -4.374519149701555
2020/10/05 12:15:21
episode : 193 | final step : 97 | total reward : -4.880514724200495
2020/10/05 12:15:29
episode : 194 | final step : 100 | total reward : -4.307882381944708
2020/10/05 12:15:36
episode : 195 | final step : 92 | total reward : -18.055286670291252
2020/10/05 12:15:44
episode : 196 | final step : 97 | total reward : -5.6378299283753535
2020/10/05 12:15:52
episode : 197 | final step : 96 | total reward : -8.7773924486047
2020/10/05 12:16:00
episode : 

2020/10/05 12:26:40
episode : 280 | final step : 100 | total reward : -3.8878823545429384
2020/10/05 12:26:47
episode : 281 | final step : 98 | total reward : -3.8749758506753125
2020/10/05 12:26:56
episode : 282 | final step : 98 | total reward : -9.399883088535901
2020/10/05 12:27:04
episode : 283 | final step : 99 | total reward : -4.287697395083555
2020/10/05 12:27:11
episode : 284 | final step : 97 | total reward : -5.573961824244094
2020/10/05 12:27:19
episode : 285 | final step : 99 | total reward : -3.4739753855308493
2020/10/05 12:27:26
episode : 286 | final step : 94 | total reward : -12.19169395016452
2020/10/05 12:27:34
episode : 287 | final step : 100 | total reward : -4.039369198915381
2020/10/05 12:27:42
episode : 288 | final step : 95 | total reward : -10.475352307401792
2020/10/05 12:27:49
episode : 289 | final step : 94 | total reward : -12.893508848675836
2020/10/05 12:27:57
episode : 290 | final step : 98 | total reward : -8.274508280861028
2020/10/05 12:28:04
episo

2020/10/05 12:38:48
episode : 373 | final step : 99 | total reward : -9.881104789998588
2020/10/05 12:38:55
episode : 374 | final step : 98 | total reward : -7.517748867970184
2020/10/05 12:39:03
episode : 375 | final step : 99 | total reward : -3.5177770907118884
2020/10/05 12:39:11
episode : 376 | final step : 99 | total reward : -3.6808523380105385
2020/10/05 12:39:19
episode : 377 | final step : 99 | total reward : -3.22159599041116
2020/10/05 12:39:27
episode : 378 | final step : 100 | total reward : -5.914130522589943
2020/10/05 12:39:34
episode : 379 | final step : 93 | total reward : -14.952449480153732
2020/10/05 12:39:41
episode : 380 | final step : 96 | total reward : -9.253800485369297
2020/10/05 12:39:49
episode : 381 | final step : 99 | total reward : -4.345230685810137
2020/10/05 12:39:56
episode : 382 | final step : 98 | total reward : -4.152499523770729
2020/10/05 12:40:04
episode : 383 | final step : 94 | total reward : -13.46058120811557
2020/10/05 12:40:12
episode :

2020/10/05 12:50:44
episode : 466 | final step : 100 | total reward : -4.724211745044741
2020/10/05 12:50:52
episode : 467 | final step : 95 | total reward : -12.07672576856423
2020/10/05 12:50:59
episode : 468 | final step : 92 | total reward : -17.826000276395334
2020/10/05 12:51:07
episode : 469 | final step : 99 | total reward : -4.98434689714243
2020/10/05 12:51:15
episode : 470 | final step : 100 | total reward : -3.999435849408698
2020/10/05 12:51:22
episode : 471 | final step : 90 | total reward : -21.661961672931312
2020/10/05 12:51:30
episode : 472 | final step : 95 | total reward : -17.43968842776816
2020/10/05 12:51:38
episode : 473 | final step : 100 | total reward : -3.91894018725219
2020/10/05 12:51:45
episode : 474 | final step : 92 | total reward : -19.906373844294933
2020/10/05 12:51:52
episode : 475 | final step : 98 | total reward : -4.866687818061692
2020/10/05 12:51:59
episode : 476 | final step : 96 | total reward : -8.753303924590327
2020/10/05 12:52:06
episode 

2020/10/05 13:02:43
episode : 559 | final step : 96 | total reward : -7.037986824886104
2020/10/05 13:02:50
episode : 560 | final step : 99 | total reward : -10.033721317343534
2020/10/05 13:02:58
episode : 561 | final step : 95 | total reward : -15.723058571761058
2020/10/05 13:03:06
episode : 562 | final step : 100 | total reward : -3.886115308427435
2020/10/05 13:03:13
episode : 563 | final step : 95 | total reward : -14.857193301100267
2020/10/05 13:03:21
episode : 564 | final step : 99 | total reward : -11.176278033275366
2020/10/05 13:03:29
episode : 565 | final step : 100 | total reward : -3.932067457439217
2020/10/05 13:03:36
episode : 566 | final step : 100 | total reward : -3.886438511569409
2020/10/05 13:03:44
episode : 567 | final step : 100 | total reward : -3.8866825299679277
2020/10/05 13:03:52
episode : 568 | final step : 97 | total reward : -6.128506802942411
2020/10/05 13:03:59
episode : 569 | final step : 99 | total reward : -3.7741896631159193
2020/10/05 13:04:07
ep

2020/10/05 13:14:45
episode : 652 | final step : 98 | total reward : -5.947888397414189
2020/10/05 13:14:52
episode : 653 | final step : 98 | total reward : -10.453808061653888
2020/10/05 13:15:00
episode : 654 | final step : 100 | total reward : -4.263766308513667
2020/10/05 13:15:08
episode : 655 | final step : 100 | total reward : -3.8998331565615274
2020/10/05 13:15:16
episode : 656 | final step : 99 | total reward : -3.707716971576559
2020/10/05 13:15:24
episode : 657 | final step : 99 | total reward : -3.383615452310991
2020/10/05 13:15:32
episode : 658 | final step : 95 | total reward : -9.505936155767372
2020/10/05 13:15:39
episode : 659 | final step : 95 | total reward : -9.854363438507534
2020/10/05 13:15:46
episode : 660 | final step : 97 | total reward : -8.337369074528752
2020/10/05 13:15:54
episode : 661 | final step : 99 | total reward : -3.6639056970855677
2020/10/05 13:16:02
episode : 662 | final step : 98 | total reward : -6.7002336305966566
2020/10/05 13:16:09
episod

2020/10/05 13:26:49
episode : 745 | final step : 99 | total reward : -3.529552568294406
2020/10/05 13:26:57
episode : 746 | final step : 97 | total reward : -9.701325247983931
2020/10/05 13:27:05
episode : 747 | final step : 98 | total reward : -3.81108562965818
2020/10/05 13:27:13
episode : 748 | final step : 100 | total reward : -3.93248907813134
2020/10/05 13:27:21
episode : 749 | final step : 99 | total reward : -3.6015488155343007
2020/10/05 13:27:30
episode : 750 | final step : 99 | total reward : -4.883295386252115
2020/10/05 13:27:37
episode : 751 | final step : 100 | total reward : -6.298601555068643
2020/10/05 13:27:46
episode : 752 | final step : 99 | total reward : -19.412641535225617
2020/10/05 13:27:54
episode : 753 | final step : 97 | total reward : -5.246001141246126
2020/10/05 13:28:02
episode : 754 | final step : 100 | total reward : -3.9125816377164284
2020/10/05 13:28:10
episode : 755 | final step : 100 | total reward : -3.9044868464650193
2020/10/05 13:28:17
episod

2020/10/05 13:38:57
episode : 838 | final step : 98 | total reward : -4.418666679619207
2020/10/05 13:39:05
episode : 839 | final step : 98 | total reward : -7.904638061946457
2020/10/05 13:39:13
episode : 840 | final step : 99 | total reward : -3.2139801337364418
2020/10/05 13:39:20
episode : 841 | final step : 99 | total reward : -3.126998406755675
2020/10/05 13:39:28
episode : 842 | final step : 99 | total reward : -3.284199181445885
2020/10/05 13:39:36
episode : 843 | final step : 100 | total reward : -3.9972790099027407
2020/10/05 13:39:43
episode : 844 | final step : 100 | total reward : -4.14186901147184
2020/10/05 13:39:51
episode : 845 | final step : 99 | total reward : -5.307018866174744
2020/10/05 13:39:59
episode : 846 | final step : 99 | total reward : -3.3095454341886352
2020/10/05 13:40:06
episode : 847 | final step : 97 | total reward : -10.0014797622935
2020/10/05 13:40:15
episode : 848 | final step : 100 | total reward : -3.88835710571
2020/10/05 13:40:23
episode : 84

2020/10/05 13:51:12
episode : 931 | final step : 97 | total reward : -5.009775816136425
2020/10/05 13:51:21
episode : 932 | final step : 98 | total reward : -3.924971932501748
2020/10/05 13:51:30
episode : 933 | final step : 98 | total reward : -4.652917661690779
2020/10/05 13:51:38
episode : 934 | final step : 99 | total reward : -5.010887278907006
2020/10/05 13:51:45
episode : 935 | final step : 94 | total reward : -12.823444311055425
2020/10/05 13:51:53
episode : 936 | final step : 96 | total reward : -7.845999848626505
2020/10/05 13:52:01
episode : 937 | final step : 97 | total reward : -5.645017810226165
2020/10/05 13:52:09
episode : 938 | final step : 99 | total reward : -8.972070821115722
2020/10/05 13:52:17
episode : 939 | final step : 97 | total reward : -10.048348411597132
2020/10/05 13:52:25
episode : 940 | final step : 99 | total reward : -5.042752054304586
2020/10/05 13:52:33
episode : 941 | final step : 100 | total reward : -4.096766113763948
2020/10/05 13:52:41
episode :

2020/10/05 14:03:26
episode : 1024 | final step : 100 | total reward : -3.9703027395305526
2020/10/05 14:03:34
episode : 1025 | final step : 100 | total reward : -3.9462558138410344
2020/10/05 14:03:42
episode : 1026 | final step : 93 | total reward : -24.84207210054818
2020/10/05 14:03:50
episode : 1027 | final step : 99 | total reward : -3.374207580069097
2020/10/05 14:03:57
episode : 1028 | final step : 97 | total reward : -4.884266040571504
2020/10/05 14:04:05
episode : 1029 | final step : 99 | total reward : -4.858582978042392
2020/10/05 14:04:14
episode : 1030 | final step : 95 | total reward : -9.672716668499831
2020/10/05 14:04:21
episode : 1031 | final step : 95 | total reward : -13.086368806312791
2020/10/05 14:04:29
episode : 1032 | final step : 100 | total reward : -3.941700150554155
2020/10/05 14:04:37
episode : 1033 | final step : 100 | total reward : -4.6416804898437505
2020/10/05 14:04:44
episode : 1034 | final step : 98 | total reward : -3.6310199422990195
2020/10/05 1

2020/10/05 14:15:29
episode : 1116 | final step : 100 | total reward : -4.113762972991836
2020/10/05 14:15:37
episode : 1117 | final step : 99 | total reward : -5.266642079388518
2020/10/05 14:15:44
episode : 1118 | final step : 96 | total reward : -13.142320585158018
2020/10/05 14:15:52
episode : 1119 | final step : 96 | total reward : -8.139251925430642
2020/10/05 14:16:00
episode : 1120 | final step : 99 | total reward : -3.596609427948522
2020/10/05 14:16:08
episode : 1121 | final step : 100 | total reward : -3.927662497745345
2020/10/05 14:16:16
episode : 1122 | final step : 98 | total reward : -4.212029600228703
2020/10/05 14:16:23
episode : 1123 | final step : 97 | total reward : -13.71335210537086
2020/10/05 14:16:32
episode : 1124 | final step : 100 | total reward : -6.939350357211481
2020/10/05 14:16:40
episode : 1125 | final step : 98 | total reward : -4.400312963632544
2020/10/05 14:16:47
episode : 1126 | final step : 100 | total reward : -4.2928617950275765
2020/10/05 14:1

2020/10/05 14:27:28
episode : 1208 | final step : 100 | total reward : -3.8891572665879295
2020/10/05 14:27:36
episode : 1209 | final step : 98 | total reward : -7.507329043950989
2020/10/05 14:27:44
episode : 1210 | final step : 99 | total reward : -6.483059838025369
2020/10/05 14:27:51
episode : 1211 | final step : 92 | total reward : -21.021259578088276
2020/10/05 14:27:58
episode : 1212 | final step : 96 | total reward : -15.96234984476142
2020/10/05 14:28:06
episode : 1213 | final step : 100 | total reward : -4.705092415568743
2020/10/05 14:28:15
episode : 1214 | final step : 100 | total reward : -3.8910215515161894
2020/10/05 14:28:23
episode : 1215 | final step : 100 | total reward : -3.88659972807946
2020/10/05 14:28:30
episode : 1216 | final step : 97 | total reward : -20.96383806854901
2020/10/05 14:28:38
episode : 1217 | final step : 99 | total reward : -4.5791675534692615
2020/10/05 14:28:46
episode : 1218 | final step : 100 | total reward : -4.193678318300314
2020/10/05 14

2020/10/05 14:39:31
episode : 1300 | final step : 100 | total reward : -5.578536100728343
2020/10/05 14:39:38
episode : 1301 | final step : 99 | total reward : -3.36293224962796
2020/10/05 14:39:46
episode : 1302 | final step : 100 | total reward : -3.8949968580464684
2020/10/05 14:39:54
episode : 1303 | final step : 99 | total reward : -3.7670959134659654
2020/10/05 14:40:01
episode : 1304 | final step : 100 | total reward : -3.900659597924644
2020/10/05 14:40:09
episode : 1305 | final step : 97 | total reward : -7.036521425965361
2020/10/05 14:40:17
episode : 1306 | final step : 100 | total reward : -5.167713870376616
2020/10/05 14:40:25
episode : 1307 | final step : 100 | total reward : -3.894452409168295
2020/10/05 14:40:32
episode : 1308 | final step : 95 | total reward : -11.350579073955927
2020/10/05 14:40:40
episode : 1309 | final step : 100 | total reward : -3.9491878551257287
2020/10/05 14:40:48
episode : 1310 | final step : 100 | total reward : -5.919354086844635
2020/10/05 

2020/10/05 14:51:26
episode : 1392 | final step : 100 | total reward : -4.710619768116061
2020/10/05 14:51:34
episode : 1393 | final step : 98 | total reward : -4.274083926966938
2020/10/05 14:51:42
episode : 1394 | final step : 100 | total reward : -3.886133289826674
2020/10/05 14:51:49
episode : 1395 | final step : 100 | total reward : -3.9195295465752906
2020/10/05 14:51:57
episode : 1396 | final step : 100 | total reward : -3.99800810774434
2020/10/05 14:52:06
episode : 1397 | final step : 95 | total reward : -9.76379004844668
2020/10/05 14:52:13
episode : 1398 | final step : 98 | total reward : -8.291175340609868
2020/10/05 14:52:21
episode : 1399 | final step : 100 | total reward : -3.929333654581299
2020/10/05 14:52:29
episode : 1400 | final step : 93 | total reward : -19.421993717051762
2020/10/05 14:52:36
episode : 1401 | final step : 100 | total reward : -3.889197375028118
2020/10/05 14:52:44
episode : 1402 | final step : 100 | total reward : -4.10486304671069
2020/10/05 14:5

2020/10/05 15:03:22
episode : 1484 | final step : 100 | total reward : -4.361035354796186
2020/10/05 15:03:30
episode : 1485 | final step : 97 | total reward : -6.6699177201697495
2020/10/05 15:03:37
episode : 1486 | final step : 98 | total reward : -4.450209485858537
2020/10/05 15:03:45
episode : 1487 | final step : 99 | total reward : -6.782236582258875
2020/10/05 15:03:53
episode : 1488 | final step : 100 | total reward : -3.930037563125069
2020/10/05 15:04:02
episode : 1489 | final step : 100 | total reward : -3.9029532131736175
2020/10/05 15:04:10
episode : 1490 | final step : 97 | total reward : -10.738706702199792
2020/10/05 15:04:17
episode : 1491 | final step : 98 | total reward : -4.650114334659065
2020/10/05 15:04:25
episode : 1492 | final step : 93 | total reward : -17.049535938530013
2020/10/05 15:04:33
episode : 1493 | final step : 100 | total reward : -4.779069607763404
2020/10/05 15:04:41
episode : 1494 | final step : 99 | total reward : -3.934908220911126
2020/10/05 15

2020/10/05 15:15:16
episode : 1576 | final step : 98 | total reward : -3.9045278681161193
2020/10/05 15:15:24
episode : 1577 | final step : 99 | total reward : -7.820778176734095
2020/10/05 15:15:32
episode : 1578 | final step : 95 | total reward : -15.11210020662044
2020/10/05 15:15:40
episode : 1579 | final step : 96 | total reward : -11.28145756304967
2020/10/05 15:15:48
episode : 1580 | final step : 97 | total reward : -9.282215175778461
2020/10/05 15:15:56
episode : 1581 | final step : 100 | total reward : -3.8877201235745815
2020/10/05 15:16:04
episode : 1582 | final step : 98 | total reward : -7.095247529168816
2020/10/05 15:16:12
episode : 1583 | final step : 100 | total reward : -4.286846156680909
2020/10/05 15:16:19
episode : 1584 | final step : 99 | total reward : -8.805651231203143
2020/10/05 15:16:27
episode : 1585 | final step : 100 | total reward : -6.895999840110207
2020/10/05 15:16:35
episode : 1586 | final step : 100 | total reward : -4.595915509969075
2020/10/05 15:1

2020/10/05 15:27:17
episode : 1668 | final step : 98 | total reward : -6.363853236219967
2020/10/05 15:27:25
episode : 1669 | final step : 97 | total reward : -11.796259314974606
2020/10/05 15:27:32
episode : 1670 | final step : 97 | total reward : -6.087254530975713
2020/10/05 15:27:40
episode : 1671 | final step : 99 | total reward : -3.372885374292281
2020/10/05 15:27:48
episode : 1672 | final step : 98 | total reward : -6.490200244109047
2020/10/05 15:27:55
episode : 1673 | final step : 99 | total reward : -3.7407548384409637
2020/10/05 15:28:03
episode : 1674 | final step : 100 | total reward : -6.990640531372705
2020/10/05 15:28:10
episode : 1675 | final step : 98 | total reward : -4.993785535221917
2020/10/05 15:28:18
episode : 1676 | final step : 99 | total reward : -3.278424003517202
2020/10/05 15:28:26
episode : 1677 | final step : 100 | total reward : -3.897487824211865
2020/10/05 15:28:34
episode : 1678 | final step : 99 | total reward : -6.432881410907484
2020/10/05 15:28:

2020/10/05 15:40:18
episode : 1760 | final step : 98 | total reward : -4.2704141400721625
2020/10/05 15:40:28
episode : 1761 | final step : 99 | total reward : -6.024548406163547
2020/10/05 15:40:38
episode : 1762 | final step : 98 | total reward : -16.52198037952678
2020/10/05 15:40:47
episode : 1763 | final step : 93 | total reward : -15.635340860551011
2020/10/05 15:40:56
episode : 1764 | final step : 100 | total reward : -3.893221964845779
2020/10/05 15:41:06
episode : 1765 | final step : 100 | total reward : -4.028207969963773
2020/10/05 15:41:15
episode : 1766 | final step : 99 | total reward : -3.1231347388152813
2020/10/05 15:41:25
episode : 1767 | final step : 100 | total reward : -5.04054627750196
2020/10/05 15:41:34
episode : 1768 | final step : 98 | total reward : -5.157222821200364
2020/10/05 15:41:44
episode : 1769 | final step : 97 | total reward : -5.167392670713937
2020/10/05 15:41:54
episode : 1770 | final step : 98 | total reward : -9.408754156406744
2020/10/05 15:42

2020/10/05 15:54:57
episode : 1852 | final step : 100 | total reward : -4.966186445366887
2020/10/05 15:55:07
episode : 1853 | final step : 99 | total reward : -13.288091296310668
2020/10/05 15:55:17
episode : 1854 | final step : 100 | total reward : -3.8909605614788063
2020/10/05 15:55:27
episode : 1855 | final step : 98 | total reward : -3.741103452009426
2020/10/05 15:55:37
episode : 1856 | final step : 100 | total reward : -5.5032720258761
2020/10/05 15:55:47
episode : 1857 | final step : 99 | total reward : -4.643874233745308
2020/10/05 15:55:56
episode : 1858 | final step : 98 | total reward : -4.84414229032744
2020/10/05 15:56:06
episode : 1859 | final step : 100 | total reward : -3.9498374222261106
2020/10/05 15:56:17
episode : 1860 | final step : 100 | total reward : -4.810448355309973
2020/10/05 15:56:27
episode : 1861 | final step : 100 | total reward : -4.330632976812723
2020/10/05 15:56:37
episode : 1862 | final step : 100 | total reward : -8.10319859548602
2020/10/05 15:5

2020/10/05 16:09:09
episode : 1944 | final step : 95 | total reward : -19.968176791944416
2020/10/05 16:09:16
episode : 1945 | final step : 95 | total reward : -11.230801789843682
2020/10/05 16:09:26
episode : 1946 | final step : 100 | total reward : -7.366441671745264
2020/10/05 16:09:36
episode : 1947 | final step : 98 | total reward : -4.187661130459944
2020/10/05 16:09:46
episode : 1948 | final step : 99 | total reward : -3.1761810147446567
2020/10/05 16:09:56
episode : 1949 | final step : 99 | total reward : -4.871128204868171
2020/10/05 16:10:06
episode : 1950 | final step : 99 | total reward : -3.9639203132278453
2020/10/05 16:10:15
episode : 1951 | final step : 100 | total reward : -4.274689213825795
2020/10/05 16:10:24
episode : 1952 | final step : 96 | total reward : -11.756700031437134
2020/10/05 16:10:33
episode : 1953 | final step : 97 | total reward : -5.753338035544658
2020/10/05 16:10:42
episode : 1954 | final step : 99 | total reward : -3.1385022614353346
2020/10/05 16

2020/10/05 16:23:47
episode : 2036 | final step : 100 | total reward : -4.348729113648105
2020/10/05 16:23:57
episode : 2037 | final step : 100 | total reward : -3.943510802614071
2020/10/05 16:24:07
episode : 2038 | final step : 98 | total reward : -3.525765617862973
2020/10/05 16:24:17
episode : 2039 | final step : 99 | total reward : -5.622784127841783
2020/10/05 16:24:26
episode : 2040 | final step : 97 | total reward : -6.534726764082933
2020/10/05 16:24:35
episode : 2041 | final step : 97 | total reward : -7.6391248097966695
2020/10/05 16:24:44
episode : 2042 | final step : 94 | total reward : -12.795378558057692
2020/10/05 16:24:53
episode : 2043 | final step : 98 | total reward : -7.058328860168345
2020/10/05 16:25:03
episode : 2044 | final step : 99 | total reward : -3.2185988357789057
2020/10/05 16:25:13
episode : 2045 | final step : 98 | total reward : -4.568383271569701
2020/10/05 16:25:22
episode : 2046 | final step : 94 | total reward : -17.075350671653197
2020/10/05 16:2

2020/10/05 16:36:27
episode : 2128 | final step : 100 | total reward : -3.9142694149116437
2020/10/05 16:36:36
episode : 2129 | final step : 99 | total reward : -6.3499531757662595
2020/10/05 16:36:46
episode : 2130 | final step : 100 | total reward : -3.966347147934883
2020/10/05 16:36:55
episode : 2131 | final step : 95 | total reward : -12.843724960569915
2020/10/05 16:37:04
episode : 2132 | final step : 99 | total reward : -4.100180131740871
2020/10/05 16:37:13
episode : 2133 | final step : 99 | total reward : -3.3558858284100523
2020/10/05 16:37:22
episode : 2134 | final step : 95 | total reward : -9.813144737176529
2020/10/05 16:37:31
episode : 2135 | final step : 94 | total reward : -15.478325088780934
2020/10/05 16:37:40
episode : 2136 | final step : 100 | total reward : -3.9704923242758383
2020/10/05 16:37:50
episode : 2137 | final step : 98 | total reward : -5.757311935860557
2020/10/05 16:37:59
episode : 2138 | final step : 98 | total reward : -3.7462926820852402
2020/10/05 

2020/10/05 16:50:36
episode : 2220 | final step : 99 | total reward : -3.9937338207312223
2020/10/05 16:50:45
episode : 2221 | final step : 100 | total reward : -7.22228447456304
2020/10/05 16:50:54
episode : 2222 | final step : 94 | total reward : -13.080629708046075
2020/10/05 16:51:04
episode : 2223 | final step : 100 | total reward : -3.9233159609178756
2020/10/05 16:51:13
episode : 2224 | final step : 98 | total reward : -5.153671639816679
2020/10/05 16:51:22
episode : 2225 | final step : 100 | total reward : -3.886508591832282
2020/10/05 16:51:31
episode : 2226 | final step : 89 | total reward : -24.472173182868374
2020/10/05 16:51:40
episode : 2227 | final step : 99 | total reward : -3.1300123459152758
2020/10/05 16:51:50
episode : 2228 | final step : 100 | total reward : -3.9363877139755346
2020/10/05 16:51:59
episode : 2229 | final step : 98 | total reward : -6.176460736549984
2020/10/05 16:52:08
episode : 2230 | final step : 98 | total reward : -3.7241156860011415
2020/10/05 

2020/10/05 17:04:42
episode : 2312 | final step : 97 | total reward : -10.518013640640277
2020/10/05 17:04:52
episode : 2313 | final step : 99 | total reward : -4.394574057300105
2020/10/05 17:05:02
episode : 2314 | final step : 99 | total reward : -4.910030647794102
2020/10/05 17:05:11
episode : 2315 | final step : 99 | total reward : -5.239487681852026
2020/10/05 17:05:20
episode : 2316 | final step : 100 | total reward : -4.045636779650792
2020/10/05 17:05:30
episode : 2317 | final step : 99 | total reward : -3.6478945578100763
2020/10/05 17:05:39
episode : 2318 | final step : 96 | total reward : -7.459763697890821
2020/10/05 17:05:49
episode : 2319 | final step : 99 | total reward : -3.2614496901162653
2020/10/05 17:05:57
episode : 2320 | final step : 98 | total reward : -5.423588249725256
2020/10/05 17:06:06
episode : 2321 | final step : 99 | total reward : -3.181322969477719
2020/10/05 17:06:15
episode : 2322 | final step : 92 | total reward : -19.975522435154446
2020/10/05 17:06

2020/10/05 17:18:44
episode : 2404 | final step : 97 | total reward : -5.209087501768465
2020/10/05 17:18:54
episode : 2405 | final step : 100 | total reward : -3.8863044152987527
2020/10/05 17:19:04
episode : 2406 | final step : 96 | total reward : -9.939556768921477
2020/10/05 17:19:13
episode : 2407 | final step : 94 | total reward : -17.986854919863504
2020/10/05 17:19:22
episode : 2408 | final step : 98 | total reward : -4.87241757493446
2020/10/05 17:19:32
episode : 2409 | final step : 100 | total reward : -4.205982507711681
2020/10/05 17:19:42
episode : 2410 | final step : 98 | total reward : -6.01981969887332
2020/10/05 17:19:51
episode : 2411 | final step : 100 | total reward : -4.862975067110412
2020/10/05 17:20:01
episode : 2412 | final step : 100 | total reward : -4.912274562092136
2020/10/05 17:20:09
episode : 2413 | final step : 95 | total reward : -15.03893204848687
2020/10/05 17:20:19
episode : 2414 | final step : 98 | total reward : -10.191694683518719
2020/10/05 17:20

2020/10/05 17:32:37
episode : 2496 | final step : 100 | total reward : -5.223942372633928
2020/10/05 17:32:45
episode : 2497 | final step : 98 | total reward : -5.150417949816273
2020/10/05 17:32:54
episode : 2498 | final step : 97 | total reward : -8.561659635458454
2020/10/05 17:33:02
episode : 2499 | final step : 95 | total reward : -10.536138288020098
2020/10/05 17:33:11
episode : 2500 | final step : 100 | total reward : -3.887483903638909
2020/10/05 17:33:19
episode : 2501 | final step : 99 | total reward : -6.404523092095445
2020/10/05 17:33:27
episode : 2502 | final step : 96 | total reward : -11.35898878791889
2020/10/05 17:33:36
episode : 2503 | final step : 100 | total reward : -4.7234931392610235
2020/10/05 17:33:44
episode : 2504 | final step : 100 | total reward : -3.9461352856116036
2020/10/05 17:33:52
episode : 2505 | final step : 99 | total reward : -3.5902699714932567
2020/10/05 17:34:00
episode : 2506 | final step : 97 | total reward : -7.143143111788959
2020/10/05 17

In [None]:
plt.figure(figsize=(15,10))
plt.xlabel("Episode")
plt.ylabel("Total rewards")
plt.plot(reward_list)

In [None]:
def moving_average(data,n):
    len_data=len(data)-n
    res_data=np.zeros(len_data)

    for i in range(len_data):
        res_data[i]=sum(data[i:i+n])/n
    return res_data

In [None]:
average_number=100
filtered_data=moving_average(reward_list,average_number)
plt.figure(figsize=(15,10))
plt.xlabel("Episode")
plt.ylabel("Total rewards")
plt.plot(filtered_data)

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[99]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[9996]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[49999]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()