In [1]:
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
import time
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import gym_Aircraft

In [2]:
env = gym.make("acav-v0")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [3]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """transition 저장"""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [4]:
class DQN(nn.Module):
    def __init__(self, num_inputs, num_actions):
        super(DQN, self).__init__()
        
        self.layers = nn.Sequential(
            nn.Linear(env.observation_space.shape[0], 40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.ReLU(),
            nn.Linear(40, env.action_space.n)
        )
        
    def forward(self, x):
        return self.layers(x)

In [5]:
BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10

In [6]:
policy_net = nn.DataParallel(DQN(env.observation_space.shape[0], env.action_space.n).to(device))
target_net = nn.DataParallel(DQN(env.observation_space.shape[0], env.action_space.n).to(device))
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

DataParallel(
  (module): DQN(
    (layers): Sequential(
      (0): Linear(in_features=5, out_features=40, bias=True)
      (1): ReLU()
      (2): Linear(in_features=40, out_features=40, bias=True)
      (3): ReLU()
      (4): Linear(in_features=40, out_features=40, bias=True)
      (5): ReLU()
      (6): Linear(in_features=40, out_features=40, bias=True)
      (7): ReLU()
      (8): Linear(in_features=40, out_features=40, bias=True)
      (9): ReLU()
      (10): Linear(in_features=40, out_features=40, bias=True)
      (11): ReLU()
      (12): Linear(in_features=40, out_features=40, bias=True)
      (13): ReLU()
      (14): Linear(in_features=40, out_features=3, bias=True)
    )
  )
)

In [7]:
n_actions = env.action_space.n

In [8]:
optimizer = optim.Adam(policy_net.parameters())
memory = ReplayMemory(10000)

In [9]:
steps_done = 0

In [10]:
def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            # t.max (1)은 각 행의 가장 큰 열 값을 반환합니다.
            # 최대 결과의 두번째 열은 최대 요소의 주소값이므로,
            # 기대 보상이 더 큰 행동을 선택할 수 있습니다.
            return policy_net(state).max(1)[1].view(1, 1)
    else:
        return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long)

In [11]:
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)

    batch = Transition(*zip(*transitions))

    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)

    state_action_values = policy_net(state_batch).gather(1, action_batch)

    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
    # 기대 Q 값 계산
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Huber 손실 계산
    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))

    # 모델 최적화
    optimizer.zero_grad()
    loss.backward()
    for param in policy_net.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

In [None]:
num_episodes = 10000
total_res=[]
reward_list=[]
for i_episode in range(num_episodes):
    total_reward=0
    
    # 환경과 상태 초기화
    res_list=np.zeros(11)
    state = env.reset()
    state=torch.from_numpy(state.astype(np.float32)).unsqueeze(0).to(device)
    for t in count():
        # 행동 선택과 수행
        
        action = select_action(state)
        next_state, reward, done, res = env.step(action.item())
        reward = torch.tensor([reward], dtype=torch.float32).to(device)
        
        next_state=torch.from_numpy(next_state.astype(np.float32)).unsqueeze(0).to(device)

        # 새로운 상태 관찰
        if not done:
            next_state = next_state
        else:
            next_state = None

        # 메모리에 변이 저장
        memory.push(state, action, next_state, reward)

        # 다음 상태로 이동
        state = next_state

        # 최적화 한단계 수행(목표 네트워크에서)
        optimize_model()
        
        # Data save
        
        cmd_list,r_list,elev_list,azim_list,Pm_list,Pt_list,h_list=res
        Pm_list=Pm_list.tolist()
        Pt_list=Pt_list.tolist()
        merged_data=itertools.chain([cmd_list],[r_list],[elev_list],[azim_list],Pm_list,Pt_list,[h_list])
        merged_data=np.array(list(merged_data))
        res_list=np.vstack([res_list,merged_data])
        
        total_reward+=reward
        
        if done:
            res_list=np.delete(res_list,0,0)
            
            total_res.append(res_list)
            reward_list.append(total_reward)
            
            now = time.localtime()
            print ("%04d/%02d/%02d %02d:%02d:%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec))
            print("episode : {} | final step : {} | total reward : {}".format(i_episode, t, total_reward.item()))
            break
            
        
    #목표 네트워크 업데이트, 모든 웨이트와 바이어스 복사
    if i_episode % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())

print('Complete')
env.close()

2020/10/01 22:20:10
episode : 0 | final step : 99 | total reward : -1.0
2020/10/01 22:20:19
episode : 1 | final step : 95 | total reward : 0.0
2020/10/01 22:20:22
episode : 2 | final step : 98 | total reward : -1.0
2020/10/01 22:20:25
episode : 3 | final step : 100 | total reward : 0.0
2020/10/01 22:20:28
episode : 4 | final step : 99 | total reward : 0.0
2020/10/01 22:20:31
episode : 5 | final step : 98 | total reward : 0.0
2020/10/01 22:20:34
episode : 6 | final step : 98 | total reward : -1.0
2020/10/01 22:20:37
episode : 7 | final step : 98 | total reward : -1.0
2020/10/01 22:20:40
episode : 8 | final step : 100 | total reward : -1.0
2020/10/01 22:20:43
episode : 9 | final step : 101 | total reward : 0.0
2020/10/01 22:20:46
episode : 10 | final step : 97 | total reward : 0.0
2020/10/01 22:20:49
episode : 11 | final step : 99 | total reward : -1.0
2020/10/01 22:20:52
episode : 12 | final step : 99 | total reward : -1.0
2020/10/01 22:20:54
episode : 13 | final step : 97 | total rewar

2020/10/01 22:25:44
episode : 113 | final step : 98 | total reward : -1.0
2020/10/01 22:25:47
episode : 114 | final step : 98 | total reward : -1.0
2020/10/01 22:25:50
episode : 115 | final step : 101 | total reward : 0.0
2020/10/01 22:25:53
episode : 116 | final step : 100 | total reward : 0.0
2020/10/01 22:25:56
episode : 117 | final step : 100 | total reward : 0.0
2020/10/01 22:25:59
episode : 118 | final step : 98 | total reward : -1.0
2020/10/01 22:26:02
episode : 119 | final step : 101 | total reward : 0.0
2020/10/01 22:26:05
episode : 120 | final step : 99 | total reward : -1.0
2020/10/01 22:26:07
episode : 121 | final step : 97 | total reward : 0.0
2020/10/01 22:26:10
episode : 122 | final step : 97 | total reward : 0.0
2020/10/01 22:26:13
episode : 123 | final step : 98 | total reward : 0.0
2020/10/01 22:26:16
episode : 124 | final step : 98 | total reward : -1.0
2020/10/01 22:26:19
episode : 125 | final step : 98 | total reward : 0.0
2020/10/01 22:26:22
episode : 126 | final 

2020/10/01 22:31:11
episode : 225 | final step : 96 | total reward : 0.0
2020/10/01 22:31:14
episode : 226 | final step : 100 | total reward : 0.0
2020/10/01 22:31:17
episode : 227 | final step : 99 | total reward : 0.0
2020/10/01 22:31:20
episode : 228 | final step : 98 | total reward : -1.0
2020/10/01 22:31:23
episode : 229 | final step : 98 | total reward : -1.0
2020/10/01 22:31:26
episode : 230 | final step : 98 | total reward : -1.0
2020/10/01 22:31:29
episode : 231 | final step : 101 | total reward : 0.0
2020/10/01 22:31:32
episode : 232 | final step : 99 | total reward : -1.0
2020/10/01 22:31:35
episode : 233 | final step : 97 | total reward : 0.0
2020/10/01 22:31:38
episode : 234 | final step : 100 | total reward : -1.0
2020/10/01 22:31:41
episode : 235 | final step : 98 | total reward : -1.0
2020/10/01 22:31:44
episode : 236 | final step : 98 | total reward : -1.0
2020/10/01 22:31:47
episode : 237 | final step : 98 | total reward : 0.0
2020/10/01 22:31:50
episode : 238 | final

2020/10/01 22:36:45
episode : 337 | final step : 97 | total reward : 0.0
2020/10/01 22:36:48
episode : 338 | final step : 98 | total reward : -1.0
2020/10/01 22:36:51
episode : 339 | final step : 99 | total reward : 0.0
2020/10/01 22:36:55
episode : 340 | final step : 97 | total reward : 0.0
2020/10/01 22:36:58
episode : 341 | final step : 98 | total reward : -1.0
2020/10/01 22:37:00
episode : 342 | final step : 97 | total reward : 0.0
2020/10/01 22:37:04
episode : 343 | final step : 98 | total reward : -1.0
2020/10/01 22:37:07
episode : 344 | final step : 99 | total reward : -1.0
2020/10/01 22:37:10
episode : 345 | final step : 98 | total reward : -1.0
2020/10/01 22:37:13
episode : 346 | final step : 99 | total reward : 0.0
2020/10/01 22:37:16
episode : 347 | final step : 99 | total reward : -1.0
2020/10/01 22:37:19
episode : 348 | final step : 98 | total reward : -1.0
2020/10/01 22:37:22
episode : 349 | final step : 98 | total reward : -1.0
2020/10/01 22:37:25
episode : 350 | final s

2020/10/01 22:42:14
episode : 449 | final step : 98 | total reward : -1.0
2020/10/01 22:42:17
episode : 450 | final step : 98 | total reward : 0.0
2020/10/01 22:42:20
episode : 451 | final step : 98 | total reward : -1.0
2020/10/01 22:42:23
episode : 452 | final step : 98 | total reward : -1.0
2020/10/01 22:42:26
episode : 453 | final step : 99 | total reward : -1.0
2020/10/01 22:42:29
episode : 454 | final step : 98 | total reward : -1.0
2020/10/01 22:42:32
episode : 455 | final step : 98 | total reward : -1.0
2020/10/01 22:42:35
episode : 456 | final step : 97 | total reward : 0.0
2020/10/01 22:42:38
episode : 457 | final step : 99 | total reward : -1.0
2020/10/01 22:42:41
episode : 458 | final step : 98 | total reward : -1.0
2020/10/01 22:42:44
episode : 459 | final step : 99 | total reward : -1.0
2020/10/01 22:42:47
episode : 460 | final step : 98 | total reward : -1.0
2020/10/01 22:42:50
episode : 461 | final step : 98 | total reward : -1.0
2020/10/01 22:42:53
episode : 462 | fina

2020/10/01 22:47:42
episode : 561 | final step : 99 | total reward : 0.0
2020/10/01 22:47:45
episode : 562 | final step : 99 | total reward : -1.0
2020/10/01 22:47:48
episode : 563 | final step : 98 | total reward : 0.0
2020/10/01 22:47:51
episode : 564 | final step : 100 | total reward : -1.0
2020/10/01 22:47:54
episode : 565 | final step : 98 | total reward : -1.0
2020/10/01 22:47:57
episode : 566 | final step : 99 | total reward : -1.0
2020/10/01 22:48:00
episode : 567 | final step : 98 | total reward : -1.0
2020/10/01 22:48:03
episode : 568 | final step : 99 | total reward : -1.0
2020/10/01 22:48:06
episode : 569 | final step : 99 | total reward : -1.0
2020/10/01 22:48:09
episode : 570 | final step : 102 | total reward : 0.0
2020/10/01 22:48:12
episode : 571 | final step : 99 | total reward : -1.0
2020/10/01 22:48:15
episode : 572 | final step : 100 | total reward : -1.0
2020/10/01 22:48:18
episode : 573 | final step : 99 | total reward : 0.0
2020/10/01 22:48:21
episode : 574 | fin

2020/10/01 22:53:09
episode : 673 | final step : 99 | total reward : -1.0
2020/10/01 22:53:12
episode : 674 | final step : 99 | total reward : 0.0
2020/10/01 22:53:15
episode : 675 | final step : 100 | total reward : -1.0
2020/10/01 22:53:18
episode : 676 | final step : 98 | total reward : -1.0
2020/10/01 22:53:21
episode : 677 | final step : 100 | total reward : 0.0
2020/10/01 22:53:24
episode : 678 | final step : 97 | total reward : 0.0
2020/10/01 22:53:27
episode : 679 | final step : 99 | total reward : -1.0
2020/10/01 22:53:30
episode : 680 | final step : 98 | total reward : -1.0
2020/10/01 22:53:33
episode : 681 | final step : 100 | total reward : 0.0
2020/10/01 22:53:36
episode : 682 | final step : 100 | total reward : -1.0
2020/10/01 22:53:39
episode : 683 | final step : 99 | total reward : -1.0
2020/10/01 22:53:42
episode : 684 | final step : 99 | total reward : 0.0
2020/10/01 22:53:45
episode : 685 | final step : 99 | total reward : 0.0
2020/10/01 22:53:47
episode : 686 | fina

2020/10/01 22:58:37
episode : 785 | final step : 99 | total reward : -1.0
2020/10/01 22:58:40
episode : 786 | final step : 98 | total reward : -1.0
2020/10/01 22:58:43
episode : 787 | final step : 96 | total reward : 0.0
2020/10/01 22:58:45
episode : 788 | final step : 98 | total reward : -1.0
2020/10/01 22:58:48
episode : 789 | final step : 98 | total reward : -1.0
2020/10/01 22:58:51
episode : 790 | final step : 98 | total reward : 0.0
2020/10/01 22:58:54
episode : 791 | final step : 99 | total reward : 0.0
2020/10/01 22:58:57
episode : 792 | final step : 98 | total reward : -1.0
2020/10/01 22:59:00
episode : 793 | final step : 99 | total reward : -1.0
2020/10/01 22:59:03
episode : 794 | final step : 98 | total reward : -1.0
2020/10/01 22:59:06
episode : 795 | final step : 100 | total reward : 0.0
2020/10/01 22:59:09
episode : 796 | final step : 98 | total reward : 0.0
2020/10/01 22:59:12
episode : 797 | final step : 98 | total reward : -1.0
2020/10/01 22:59:15
episode : 798 | final 

2020/10/01 23:04:03
episode : 896 | final step : 98 | total reward : 0.0
2020/10/01 23:04:06
episode : 897 | final step : 100 | total reward : 0.0
2020/10/01 23:04:09
episode : 898 | final step : 99 | total reward : -1.0
2020/10/01 23:04:12
episode : 899 | final step : 99 | total reward : -1.0
2020/10/01 23:04:15
episode : 900 | final step : 99 | total reward : 0.0
2020/10/01 23:04:18
episode : 901 | final step : 99 | total reward : 0.0
2020/10/01 23:04:20
episode : 902 | final step : 99 | total reward : 0.0
2020/10/01 23:04:24
episode : 903 | final step : 98 | total reward : -1.0
2020/10/01 23:04:26
episode : 904 | final step : 98 | total reward : -1.0
2020/10/01 23:04:29
episode : 905 | final step : 99 | total reward : -1.0
2020/10/01 23:04:32
episode : 906 | final step : 98 | total reward : -1.0
2020/10/01 23:04:35
episode : 907 | final step : 98 | total reward : 0.0
2020/10/01 23:04:38
episode : 908 | final step : 98 | total reward : -1.0
2020/10/01 23:04:41
episode : 909 | final s

2020/10/01 23:09:29
episode : 1007 | final step : 99 | total reward : 0.0
2020/10/01 23:09:32
episode : 1008 | final step : 98 | total reward : -1.0
2020/10/01 23:09:35
episode : 1009 | final step : 98 | total reward : -1.0
2020/10/01 23:09:38
episode : 1010 | final step : 98 | total reward : -1.0
2020/10/01 23:09:41
episode : 1011 | final step : 98 | total reward : 0.0
2020/10/01 23:09:44
episode : 1012 | final step : 99 | total reward : -1.0
2020/10/01 23:09:47
episode : 1013 | final step : 98 | total reward : -1.0
2020/10/01 23:09:50
episode : 1014 | final step : 100 | total reward : -1.0
2020/10/01 23:09:53
episode : 1015 | final step : 98 | total reward : 0.0
2020/10/01 23:09:56
episode : 1016 | final step : 98 | total reward : -1.0
2020/10/01 23:09:59
episode : 1017 | final step : 102 | total reward : 0.0
2020/10/01 23:10:02
episode : 1018 | final step : 99 | total reward : -1.0
2020/10/01 23:10:05
episode : 1019 | final step : 99 | total reward : 0.0
2020/10/01 23:10:08
episode 

2020/10/01 23:14:57
episode : 1117 | final step : 98 | total reward : -1.0
2020/10/01 23:15:00
episode : 1118 | final step : 100 | total reward : 0.0
2020/10/01 23:15:03
episode : 1119 | final step : 99 | total reward : 0.0
2020/10/01 23:15:06
episode : 1120 | final step : 99 | total reward : -1.0
2020/10/01 23:15:09
episode : 1121 | final step : 98 | total reward : -1.0
2020/10/01 23:15:12
episode : 1122 | final step : 98 | total reward : -1.0
2020/10/01 23:15:15
episode : 1123 | final step : 98 | total reward : 0.0
2020/10/01 23:15:18
episode : 1124 | final step : 99 | total reward : 0.0
2020/10/01 23:15:21
episode : 1125 | final step : 98 | total reward : 0.0
2020/10/01 23:15:24
episode : 1126 | final step : 98 | total reward : -1.0
2020/10/01 23:15:27
episode : 1127 | final step : 100 | total reward : -1.0
2020/10/01 23:15:30
episode : 1128 | final step : 98 | total reward : -1.0
2020/10/01 23:15:33
episode : 1129 | final step : 99 | total reward : -1.0
2020/10/01 23:15:36
episode 

2020/10/01 23:20:21
episode : 1227 | final step : 98 | total reward : -1.0
2020/10/01 23:20:24
episode : 1228 | final step : 100 | total reward : 0.0
2020/10/01 23:20:27
episode : 1229 | final step : 100 | total reward : 0.0
2020/10/01 23:20:30
episode : 1230 | final step : 98 | total reward : -1.0
2020/10/01 23:20:33
episode : 1231 | final step : 98 | total reward : -1.0
2020/10/01 23:20:36
episode : 1232 | final step : 100 | total reward : -1.0
2020/10/01 23:20:39
episode : 1233 | final step : 99 | total reward : 0.0
2020/10/01 23:20:42
episode : 1234 | final step : 98 | total reward : -1.0
2020/10/01 23:20:45
episode : 1235 | final step : 99 | total reward : -1.0
2020/10/01 23:20:48
episode : 1236 | final step : 100 | total reward : -1.0
2020/10/01 23:20:51
episode : 1237 | final step : 98 | total reward : -1.0
2020/10/01 23:20:54
episode : 1238 | final step : 98 | total reward : 0.0
2020/10/01 23:20:57
episode : 1239 | final step : 100 | total reward : 0.0
2020/10/01 23:21:00
episo

2020/10/01 23:25:52
episode : 1337 | final step : 100 | total reward : 0.0
2020/10/01 23:25:55
episode : 1338 | final step : 98 | total reward : -1.0
2020/10/01 23:25:58
episode : 1339 | final step : 99 | total reward : -1.0
2020/10/01 23:26:01
episode : 1340 | final step : 99 | total reward : -1.0
2020/10/01 23:26:04
episode : 1341 | final step : 98 | total reward : -1.0
2020/10/01 23:26:07
episode : 1342 | final step : 98 | total reward : -1.0
2020/10/01 23:26:10
episode : 1343 | final step : 98 | total reward : -1.0
2020/10/01 23:26:13
episode : 1344 | final step : 98 | total reward : 0.0
2020/10/01 23:26:16
episode : 1345 | final step : 98 | total reward : -1.0
2020/10/01 23:26:19
episode : 1346 | final step : 99 | total reward : -1.0
2020/10/01 23:26:22
episode : 1347 | final step : 98 | total reward : -1.0
2020/10/01 23:26:25
episode : 1348 | final step : 100 | total reward : 0.0
2020/10/01 23:26:28
episode : 1349 | final step : 98 | total reward : -1.0
2020/10/01 23:26:31
episod

2020/10/01 23:31:17
episode : 1447 | final step : 98 | total reward : 0.0
2020/10/01 23:31:20
episode : 1448 | final step : 99 | total reward : 0.0
2020/10/01 23:31:23
episode : 1449 | final step : 98 | total reward : -1.0
2020/10/01 23:31:26
episode : 1450 | final step : 98 | total reward : -1.0
2020/10/01 23:31:29
episode : 1451 | final step : 99 | total reward : -1.0
2020/10/01 23:31:32
episode : 1452 | final step : 97 | total reward : 0.0
2020/10/01 23:31:35
episode : 1453 | final step : 98 | total reward : -1.0
2020/10/01 23:31:37
episode : 1454 | final step : 98 | total reward : -1.0
2020/10/01 23:31:41
episode : 1455 | final step : 98 | total reward : -1.0
2020/10/01 23:31:43
episode : 1456 | final step : 98 | total reward : -1.0
2020/10/01 23:31:46
episode : 1457 | final step : 98 | total reward : -1.0
2020/10/01 23:31:49
episode : 1458 | final step : 98 | total reward : -1.0
2020/10/01 23:31:52
episode : 1459 | final step : 100 | total reward : 0.0
2020/10/01 23:31:55
episode 

2020/10/01 23:36:41
episode : 1557 | final step : 98 | total reward : -1.0
2020/10/01 23:36:44
episode : 1558 | final step : 100 | total reward : 0.0
2020/10/01 23:36:47
episode : 1559 | final step : 98 | total reward : -1.0
2020/10/01 23:36:50
episode : 1560 | final step : 99 | total reward : -1.0
2020/10/01 23:36:53
episode : 1561 | final step : 98 | total reward : -1.0
2020/10/01 23:36:56
episode : 1562 | final step : 98 | total reward : -1.0
2020/10/01 23:36:59
episode : 1563 | final step : 99 | total reward : -1.0
2020/10/01 23:37:02
episode : 1564 | final step : 97 | total reward : 0.0
2020/10/01 23:37:05
episode : 1565 | final step : 97 | total reward : 0.0
2020/10/01 23:37:08
episode : 1566 | final step : 100 | total reward : 0.0
2020/10/01 23:37:11
episode : 1567 | final step : 98 | total reward : -1.0
2020/10/01 23:37:14
episode : 1568 | final step : 99 | total reward : -1.0
2020/10/01 23:37:17
episode : 1569 | final step : 100 | total reward : 0.0
2020/10/01 23:37:20
episode

2020/10/01 23:42:06
episode : 1667 | final step : 99 | total reward : -1.0
2020/10/01 23:42:09
episode : 1668 | final step : 98 | total reward : -1.0
2020/10/01 23:42:11
episode : 1669 | final step : 98 | total reward : -1.0
2020/10/01 23:42:14
episode : 1670 | final step : 100 | total reward : 0.0
2020/10/01 23:42:17
episode : 1671 | final step : 99 | total reward : -1.0
2020/10/01 23:42:20
episode : 1672 | final step : 99 | total reward : -1.0
2020/10/01 23:42:23
episode : 1673 | final step : 99 | total reward : -1.0
2020/10/01 23:42:26
episode : 1674 | final step : 99 | total reward : -1.0
2020/10/01 23:42:29
episode : 1675 | final step : 98 | total reward : -1.0
2020/10/01 23:42:32
episode : 1676 | final step : 98 | total reward : -1.0
2020/10/01 23:42:35
episode : 1677 | final step : 98 | total reward : 0.0
2020/10/01 23:42:38
episode : 1678 | final step : 98 | total reward : -1.0
2020/10/01 23:42:41
episode : 1679 | final step : 98 | total reward : -1.0
2020/10/01 23:42:44
episod

2020/10/01 23:47:30
episode : 1777 | final step : 98 | total reward : -1.0
2020/10/01 23:47:32
episode : 1778 | final step : 97 | total reward : 0.0
2020/10/01 23:47:35
episode : 1779 | final step : 94 | total reward : 0.0
2020/10/01 23:47:38
episode : 1780 | final step : 98 | total reward : -1.0
2020/10/01 23:47:41
episode : 1781 | final step : 98 | total reward : 0.0
2020/10/01 23:47:44
episode : 1782 | final step : 99 | total reward : -1.0
2020/10/01 23:47:47
episode : 1783 | final step : 98 | total reward : -1.0
2020/10/01 23:47:50
episode : 1784 | final step : 98 | total reward : -1.0
2020/10/01 23:47:53
episode : 1785 | final step : 98 | total reward : -1.0
2020/10/01 23:47:56
episode : 1786 | final step : 97 | total reward : 0.0
2020/10/01 23:47:59
episode : 1787 | final step : 98 | total reward : -1.0
2020/10/01 23:48:02
episode : 1788 | final step : 99 | total reward : -1.0
2020/10/01 23:48:05
episode : 1789 | final step : 98 | total reward : 0.0
2020/10/01 23:48:08
episode : 

2020/10/01 23:52:53
episode : 1887 | final step : 100 | total reward : 0.0
2020/10/01 23:52:56
episode : 1888 | final step : 98 | total reward : -1.0
2020/10/01 23:52:59
episode : 1889 | final step : 100 | total reward : 0.0
2020/10/01 23:53:02
episode : 1890 | final step : 97 | total reward : 0.0
2020/10/01 23:53:05
episode : 1891 | final step : 98 | total reward : -1.0
2020/10/01 23:53:08
episode : 1892 | final step : 98 | total reward : -1.0
2020/10/01 23:53:11
episode : 1893 | final step : 98 | total reward : -1.0
2020/10/01 23:53:14
episode : 1894 | final step : 98 | total reward : -1.0
2020/10/01 23:53:17
episode : 1895 | final step : 98 | total reward : -1.0
2020/10/01 23:53:20
episode : 1896 | final step : 98 | total reward : -1.0
2020/10/01 23:53:23
episode : 1897 | final step : 98 | total reward : -1.0
2020/10/01 23:53:26
episode : 1898 | final step : 100 | total reward : 0.0
2020/10/01 23:53:29
episode : 1899 | final step : 99 | total reward : 0.0
2020/10/01 23:53:32
episode

In [None]:
plt.figure(figsize=(15,10))
plt.xlabel("Episode")
plt.ylabel("Total rewards")
plt.plot(reward_list)

In [None]:
def moving_average(data,n):
    len_data=len(data)
    res_data=np.zeros(len_data)
    for i in range(n):
        res_data[i]=sum(data[:i])/(i+1)
    
    for i in range(n,len_data):
        res_data[i]=sum(data[i:i+n])/n
    return res_data

In [None]:
average_number=200
filtered_data=moving_average(reward_list,average_number)

In [None]:
plt.figure(figsize=(15,10))
plt.xlabel("Episode")
plt.ylabel("Total rewards")
plt.plot(filtered_data)

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[0]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()


In [None]:
# trajectory plots

from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[10]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
# trajectory plots

from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[100]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
# trajectory plots

from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[1000]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
# trajectory plots

from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

In [None]:
# plots
Deg2Rad = np.pi/180
Rad2Deg = 1/Deg2Rad

plt_res=total_res[9999]


plt.figure(figsize=(15,9), dpi=100)

plt.subplot(511)
plt.plot(plt_res[:,0], label=r'$\dot{h}_{cmd}$')
plt.ylabel(r'$\dot{h}_{cmd}$ ($m/s$)'), plt.grid()

plt.subplot(512)
plt.plot(plt_res[:,10],label=r'$\{h}$')
plt.ylabel(r'$h$ (m)'), plt.grid()

plt.subplot(513)
plt.plot(plt_res[:,1],label=r'$\{r}$')
plt.ylabel(r'$r$ (m)'), plt.grid()

plt.subplot(514)
plt.plot(plt_res[:,2]*Rad2Deg, label='elevation')
plt.ylabel('elevation (deg)'), plt.grid()

plt.subplot(515)
plt.plot(plt_res[:,3]*Rad2Deg, label='azimuth')
plt.ylabel('azimuth (deg)'), plt.grid()

plt.legend()
plt.show()

In [None]:
# trajectory plots

from mpl_toolkits.mplot3d import Axes3D

plt.figure(figsize=(12,9), dpi=100)
plt.gca(projection='3d')
plt.plot(plt_res[:,5], plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.xlim(-2000,2000)
plt.ylim(0,4000)
plt.legend()
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,5], plt_res[:,4], label='player', linewidth=3)
plt.plot(plt_res[:,8], plt_res[:,7], label='target', linewidth=3)
plt.xlabel('East')
plt.ylabel('North')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()

plt.figure(figsize=(12,9), dpi=100)
plt.plot(plt_res[:,4], -plt_res[:,6], label='player', linewidth=3)
plt.plot(plt_res[:,7], -plt_res[:,9], label='target', linewidth=3)
plt.xlabel('North')
plt.ylabel('Up')
plt.grid(), plt.legend(), plt.axis('equal')
plt.show()