In [None]:
import os
import torch
import random
import numpy as np
from collections import namedtuple, deque 
import math
import gym
from gym import logger,spaces
from sklearn import preprocessing
import matplotlib.pyplot as plt

from UAV_ENV.envs.DQNenv_fixedpower import UAVenv_fixedpower
from DQN_PyTorch_fixedpower import dqn_agent_fixedpower
#from DQN import DQNagent
import seaborn as sns 
from mpl_toolkits import mplot3d

In [None]:
# registering the created environment in OpenAI gym : https://www.youtube.com/watch?v=kd4RrN-FTWY
# creating an environment object
#env=DQNUAVenv()
env= gym.make('DqnUavEnv-v3')
env.seed(0)
Observations=env.observation_space.shape[0]
#Observations_n=env.observation_space.n
Observation_shape=env.observation_space.shape
state=env.reset()
Actions_shape=env.action_space
Actions=env.action_space.n
print('Observation_space:',Observations)
#print('Observation_number:',Observations_n)
print('Observation_shape:',Observation_shape)
print('Action_space:',Actions_shape)
print('Action_number:', Actions)
print('Initial_state: ',state)
#env.path_plot()

In [None]:
# creating DQN agent
seed=0
Agent_DQN=dqn_agent_fixedpower(Observations,Actions,seed=0)
# creating DDPG agent
#Agent_DDPG=ddpg_agent(Observation_n,Action_dim)  # create environment object before calling

In [None]:
# training the dqn agent
num_episode=4500
step_per_episode=200
reward_per_episode=[]
Energy_per_episode=[]
Data_Rate_per_episode=[]
Avg_reward=[]
Avg_energy=[]
Avg_Rate=[]
losses_per_episode=[]
exploration_rate=1
epsilon_decay=0.995
Min_epsilon=0.001

# saving all location in a array
x_location=[]
y_location=[]
z_location=[]

for i in range (num_episode):
    # reset environment
    state=env.reset()
    #print("state is:" ,state)
    cum_reward=0
    cum_rate=0
    energy=0
    loss=0
    done=False
    #epsilon=Max_epsilon
    
    for j in range (step_per_episode-1):
        #print("state is:" ,state)
        
        # action selection
        action=Agent_DQN.choose_action(state,exploration_rate)
        #print("Action is: ",action)
        
        # calculate reward for chosen action
        next_state,reward,done,Data_Rate=env.step(action)
        #print(reward)
        # save transition 
        Agent_DQN.reply_buffer(state,action,reward,next_state,done)
        energy_per_step,sum_energy=env.energy(state,next_state[0:3])
        
        #print("reward,next_state,done,energy: ",reward,next_state,done,energy_per_step)
        #print('========================')
        
        state=next_state
        cum_reward+=reward
        cum_rate+=Data_Rate["Rate"]
        
        #energy+=energy_per_step
        
        if i==num_episode-1:
            x_location.append(next_state[0])
            y_location.append(next_state[1])
            z_location.append(next_state[2])
         
        if done:
            break
        
        # sampling for agent training
        loss=Agent_DQN.sample_buffer()
        if loss is not None:
            loss+=loss

    exploration_rate *= epsilon_decay
    exploration_rate= max(exploration_rate,Min_epsilon)
        
    # average reward calculation
    losses_per_episode.append(loss)
    reward_per_episode.append(cum_reward)
    Avg_reward.append(np.mean(reward_per_episode))
    Energy_per_episode.append(sum_energy)
    Avg_energy.append(np.mean(Energy_per_episode))
    Data_Rate_per_episode.append(cum_rate)
    Avg_Rate.append(np.mean(Data_Rate_per_episode))
    
    # print the training results
    if (i%2==0):
        print('\rEpisode {} \t Average Reward: {:.2f},\t Loss: {:.2f}, \t Energy:{:.2f}'.
              format(i, Avg_reward[i],losses_per_episode[i],Energy_per_episode[i]))
                                                                                         
        #print('\rEpisode {}\tloss: {:.2f}'.format(i, losses[i]))

    # save parameters of main network 
    if i==num_episode-1:
        # create a file to save weights and bias
        FILE="model_parameters.pth"
        torch.save(Agent_DQN.main_network.state_dict(),FILE)

# reward visualization
fig1 = plt.figure()
sns.set()
plt.plot(Avg_reward)
plt.xlabel('Number of Episode')
plt.ylabel('Average Reward')

norm=[]
for k in Avg_reward:
    k=(k-min(Avg_reward))/(max(Avg_reward)-min(Avg_reward))
    norm.append(k)
fig2 = plt.figure()
sns.set()
plt.plot(norm)
plt.xlabel('Number of Episode')
plt.ylabel('Normalized Average Reward')

# loss visualization
# convert loss values which are tensor values to a list of numpy array
losses_per_episode=[k.detach().numpy() for k in losses_per_episode if k is not None]
fig3=plt.figure()
sns.set()
plt.plot(losses_per_episode)
plt.xlabel('Number of Episode')
plt.ylabel('Loss Function')

# energy usage visualization
fig4 = plt.figure()
sns.set()
plt.plot(Avg_energy)
plt.xlabel('Number of Episode')
plt.ylabel('Average Energy Usage (kJ)')

fig5 = plt.figure()
sns.set()
plt.plot(Energy_per_episode)
plt.xlabel('Number of Episode')
plt.ylabel('Energy Usage (kJ) ')

# energy usage visualization
fig6 = plt.figure()
sns.set()
plt.plot(Avg_Rate)
plt.xlabel('Number of Episode')
plt.ylabel('Average Data Rate (kb/s)')


plt.show()

In [None]:
# uav trajectory for last episode
# the reason why uav is jumped to the terminal state is that I appened the destination to the 3 arrays, fix it!
#print(x_location)
#print(y_location)
#print(z_location)
sns.set_style("whitegrid")
env.path_plot(x_location,y_location,z_location)

In [None]:
# saving results for plotting: lr=0.003
from numpy import savetxt
savetxt('Average_reward_fixed_power.csv', Avg_reward, delimiter=',')
savetxt('reward_per_episode_fixed_power.csv', reward_per_episode, delimiter=',')
savetxt('Normalized_Average_reward_fixedpower.csv', norm, delimiter=',')
savetxt('losses_per_episode_fixed_power.csv', losses_per_episode, delimiter=',')
savetxt('Energy_per_episode_fixed_power.csv', Energy_per_episode, delimiter=',')
savetxt('Average_energy_fixed_power.csv', Avg_energy, delimiter=',')
savetxt('Avg_Rate_fixed_power.csv', Avg_Rate, delimiter=',')

In [None]:
# load parameters of saved dqn
Agent_DQN.main_network.load_state_dict(torch.load(FILE))