In [1]:
import numpy as np
import pandas as pd 
import argparse  
import random
from sklearn.preprocessing import MinMaxScaler

from collections import deque
import tensorflow as tf 
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Lambda, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

import gc


#https://stackoverflow.com/questions/69136518/tensorflow-2-getting-warningtensorflowx-out-of-the-last-x-calls-to-function
#tf.compat.v1.disable_eager_execution()

# import warnings
# warnings.filterwarnings('ignore')

In [2]:
print("tf version : ",tf.__version__)
!python --version

tf version :  2.8.0
Python 3.9.7


### NeuralNetwork


    

In [3]:

class NeuralNetwork(object):
    
    def __init__(self, input_shape, action_size,args):
        
        
        self.state_size = input_shape
        self.action_size = action_size
        self.learning_rate = args['learning_rate']
        self.num_nodes = args['number_nodes']    
        self.model = self.build_model()
        self.model_ = self.build_model()
         
   
    
    def build_model(self):
        
        # x is the input to the network 
        x = Input(self.state_size)
        
        if args['dueling']: # dueling-DQN 
            
            # for V(s)
            a1 = Dense(self.num_nodes, activation="relu")(x)
            a2 = Dense(self.num_nodes, activation="relu")(a1)
            a3 = Dense(1, activation="linear")(a2)
            
            # for A(s,a)
            b1 = Dense(self.num_nodes, activation="relu")(x)
            b2 = Dense(self.num_nodes, activation="relu")(b1)
            b3 = Dense(args['uav_number']+2, activation="linear")(b2)
            
            c = Concatenate(axis=-1)([a3, b3])
            
            # for Q(s,a)
            z = Lambda(lambda a: K.expand_dims(a[:, 0], axis=-1) + a[:, 1:] - K.mean(a[:, 1:], keepdims=True),
                      output_shape=(self.action_size, ))(c)
        
        
        else :  # standard DQN 
            
            y1 = Dense(self.num_nodes, input_shape =self.state_size, activation='relu')(x)
            y2 = Dense(self.num_nodes, activation='relu')(y1)
            z = Dense(args['uav_number'] + 2, activation="linear")(y2)

        model = Model(inputs=x, outputs=z)
        optimizer = Adam(learning_rate=self.learning_rate)
        model.compile(loss="mse", optimizer=optimizer)
        #model.summary()
        return model
        
   
    def train(self, x, y, sample_weight=None, epochs=100, verbose=0):  #x is the input to the network and y is the output
        self.model.fit(x, y, batch_size=len(x), sample_weight=sample_weight, epochs=epochs, verbose=verbose)
        
        
    
    def predict(self, state, target=False):
        
        if target: 
            return self.model_.predict(state)
        else: 
            return self.model.predict(state )
        
    
    def predict_one_sample(self, state, target=False):   
       
        self.predict(state, target=target )
        
        
    def update_target_model(self):
        self.model_.set_weights(self.model.get_weights())
           
        
        
        

### Uniform Experience Replay 

In [4]:
class UniformReplayMemory(object):
    
    def __init__(self, capacity):
        
        self.capacity = capacity
        self.memory = deque(maxlen = self.capacity)
        
    def uer_remember(self, sample):
        
        self.memory.append(sample)
        
    def uer_sample(self, batch_size):
        
        batch_size = min(batch_size, len(self.memory))
        sample_batch = random.sample(self.memory, batch_size)
        
        return sample_batch
    
        

### SumTree
<br>



In [5]:

class SumTree(object):
    
    def __init__(self, capacity):
        self.write = 0 
        self.capacity = capacity
        self.tree = np.zeros(2*capacity - 1)
        self.data = np.zeros(capacity, dtype=object)
        
        
 
    def add(self, priority, data):
        
        idx = self.write + self.capacity - 1
        self.data[self.write] = data
        self.update(idx, priority)
        self.write += 1

        if self.write >= self.capacity:
            self.write = 0
            
            
       
    
            
    def total(self):
        return self.tree[0]    
     
        
        
        
        
        
    def update(self, idx, priority):
        
        change = priority - self.tree[idx]
        self.tree[idx] = priority
        
        while idx !=0:
            idx = (idx -1) // 2
            self.tree[idx] +=change
            
            
            
            
     
    def retrieve(self, idx, s):
        
        left = 2*idx + 1
        right = left + 1
        
        if left >= len(self.tree):
            return idx
        
        if s<= self.tree[left]:
            return self.retrieve(left, s)
        
        else : 
            return self.retrieve(right, s - self.tree[left])
        
            
            
    def get(self, s):
        
        idx = self.retrieve(0, s)
        dataIdx = idx - self.capacity + 1
        
        return idx, self.tree[idx], self.data[dataIdx]   
    
        
        
        

### PriortizedExperienceReplay



In [6]:
import random  

class PrioritizedReplayMemory(object):
    
     
    def __init__(self, capacity, priority_scale):
        
        self.capacity = capacity 
        self.priority_scale = priority_scale     
        self.max_priority = 0 
        self.e = 0.01    # to avoid 0 probability of experiences
        self.memory = SumTree(self.capacity)    
     
    
    
    def get_priority(self, TDerror):
        
        return (TDerror + self.e) ** self.priority_scale
    
    
    
    def priority_remember(self, sample, TDerror):
       
        priority = self.get_priority(TDerror)
        self_max = max(self.max_priority, priority)
        self.memory.add(self_max, sample)

     
    
    def priority_sample(self, batch_size):
        
        sample_batch = []
        sample_batch_indices = []
        sample_batch_priorities = []
        
        num_segments = self.memory.total() / batch_size
        
        for i in range(batch_size):
            
            left = num_segments * i 
            right = num_segments * (i + 1)
            s = random.uniform(left, right)
            idx, priority, data = self.memory.get(s)
            
            sample_batch.append((idx,data))
            sample_batch_indices.append(idx)
            sample_batch_priorities.append(priority)
            
        return [sample_batch, sample_batch_indices, sample_batch_priorities]
    
    
    
    def priority_update(self, batch_indices, errors):

        for i in range(len(batch_indices)):
            
            priority = self.get_priority(errors[i])
            self.memory.update(batch_indices[i], priority)
        
        
        
        

### Agent

In [7]:

class Agent(object):
    
    def __init__(self, state_size, action_size, args):
        
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = args['learning_rate']
        self.update_target_frequency = args['target_frequency']
        self.batch_size = args["batch_size"]
        self.gamma = args["gamma"]
        self.epsilon = args['epsilon']
        self.min_epsilon = args['min_epsilon']
        self.epsilon_decay = args['epsilon_decay']
        self.beta = args['min_beta']
        self.beta_max = args['beta_max']
        self.agent_num = args['agent_number']
        self.beta_increment = args['beta_increment']
        self.step = 0
        self.dqn_model = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, ))
        self.target_dqn_model = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, ))
        
        if args['memory_type']=="per":
            
            self.memory = PrioritizedReplayMemory(args['memory_capacity'], args['priority_scale'])
            
        else : 
            
             self.memory = UniformReplayMemory(args['memory_capacity'])
    
    
    def decay_epsilon(self):
       
        self.step +=1
        
        if self.beta < self.beta_max:
            
            self.beta = self.beta_max + (self.beta - self.beta_max) * np.exp(-1. * self.step * self.beta_increment) 
           
            
        if (self.epsilon > self.min_epsilon):
            
            self.epsilon = self.min_epsilon + (self.epsilon - self.min_epsilon) * np.exp(-1. * self.step * self.epsilon_decay)
           
            return self.epsilon
        
        else : 
            
            return self.min_epsilon
        
       
    
   

    def choose_action(self, state):
        
        exploration_rate = self.decay_epsilon()
        
        if exploration_rate > random.random():
            
            return random.randrange(-1,args['uav_number']+1)  #explore
        
        else : 
            
            state = np.reshape(state, (1,args['uav_number']*3 +6))
            
            return np.argmax(self.dqn_model.predict_one_sample(state))
        
            
    
    
    def per_batch_error(self, batch):   # batch = [(0, sample)] = [(0, (states[i], actions[i], rewards[i], next_states[i], done[i]))]
        
        batch_len = len(batch)
        
        states = np.array([batch[i][1][0] for i in range(batch_len)])
        next_states = np.array([batch[i][1][3] for i in range(batch_len)])
        
        action = [batch[i][1][1] for i in range(batch_len)]
        reward = [batch[i][1][2] for i in range(batch_len)]
        done =   [batch[i][1][4] for i in range(batch_len)]
        
        
        target = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, )).predict(states)

        target_old = np.array(target)
        
        target_ = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, )).predict(next_states)
        
        target_next = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, )).predict(next_states, target=True)
        
        x = np.zeros((batch_len, self.state_size))
        y = np.zeros ((batch_len, args['uav_number']+2))
        loss = np.zeros(args['agent_number'])
        errors = np.zeros(batch_len)
       
        for i in range(batch_len):
           
            if done[i]:
                
                target[i][action[i]] = reward[i]
            else: 
                
                if args['double_dqn'] == "double_dqn":
                    
                    target[i][action[i]] = reward[i] + self.gamma * target_next[i][np.argmax(target_[i])]
                
                else : 
                   
                    target[i][action[i]] = reward[i] + self.gamma * (np.amax(target_next[i]))

            x[i] = states[i]
            y[i] = action[i]
          
        indices = np.arange(batch_len)
        errors = np.abs(target_old[indices, np.array(action)] - target[indices, np.array(action)] )
       
        if batch_len == self.agent_num:
            
            loss = np.sqrt(errors)


        return x, y, errors, loss

      
        
    
    def per_observe(self, sample):
        
        _, _, errors, loss= self.per_batch_error(sample)
        
        
        for i in range(len(sample)):
        
            self.memory.priority_remember(sample[i][1], errors[i])
        
        return loss 
        
  
    
    def per_replay(self):
     
        [batch, batch_idx, batch_priorities] = self.memory.priority_sample(self.batch_size)
        print("# replay-------- ")
        x, y, errors,_= self.per_batch_error(batch)
       
        normalized_batch_priorities = [float(i) / sum(batch_priorities) for i in batch_priorities]      # P(i) = (pi)**a / sum(pk)**a where pi is priority value & 
                                                                                                        # sum(pk) normalization by all priority values in replay buffer
        
        # b_values = importance sampling weights 
        b_values = [(self.batch_size * i) ** (-1 * self.beta) for i in normalized_batch_priorities]     # (1/N * 1/P(i))**b
        
        normalized_b_values = [float(i) / max(b_values) for i in range(len(b_values))]
        
        sample_weights = [errors[i] * normalized_b_values[i] for i in range(len(errors))]
        
        self.dqn_model.train(x, y, np.array(sample_weights))
        
        self.memory.priority_update(batch_idx, errors)
       
    
    
    
    def uer_batch_error(self, batch):   # batch = [(0, sample)] = [(0, (states[i], actions[i], rewards[i], next_states[i], done[i]))]
        
        batch_len = len(batch)
        
        states = np.array([batch[i][0] for i in range(batch_len)])
        next_states = np.array([batch[i][3] for i in range(batch_len)])
        
        action = [batch[i][1] for i in range(batch_len)]
        reward = [batch[i][2] for i in range(batch_len)]
        done =   [batch[i][4] for i in range(batch_len)]
        
        
        target = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, )).predict(states)

        target_old = np.array(target)
        
        target_ = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, )).predict(next_states)

        target_next = NeuralNetwork(action_size = self.action_size, args = args, input_shape = (self.state_size, )).predict(next_states, target=True)
        
        x = np.zeros((batch_len, self.state_size))
        y = np.zeros ((batch_len, args['uav_number']+2))
        loss = np.zeros(args['agent_number'])
        errors = np.zeros(batch_len)
        
        for i in range(batch_len):
           
            if done[i]:
                
                target[i][action[i]] = reward[i]
                
            else: 
                
                if args['double_dqn']=="double_dqn":
                    
                    target[i][action[i]] = reward[i] + self.gamma * target_next[i][np.argmax(target_[i])]
                    
                else:
                    
                    target[i][action[i]] = reward[i] + self.gamma * (np.amax(target_next[i]))

            x[i] = states[i]
            y[i] = action[i]
          
        indices = np.arange(batch_len)
        errors = np.abs(target_old[indices, np.array(action)] - target[indices, np.array(action)] )
       
        if batch_len == self.agent_num:
            
            loss = np.sqrt(errors)


        return x, y, loss

      
        
    
    def uer_observe(self, sample):
        
        _, _, loss= self.uer_batch_error(sample)
        
        for i in range(len(sample)):

                self.memory.uer_remember(sample[i])
        
        return loss 
        
  
    
    def uer_replay(self):
     
        batch = self.memory.uer_sample(self.batch_size)
        x, y, _ = self.uer_batch_error(batch)
        self.dqn_model.train(x,y)
        
        
    def update_target_model(self):
        
        if self.step % self.update_target_frequency == 0 : 
            self.dqn_model.update_target_model()
    
    
    
    
    
    
    

### Maths()

In [8]:
import math
class Maths(object):
    
    def __init__(self,args):
        
        self.num_agents = args['agent_number']
        self.num_uav = args['uav_number']
        self.grid_width = args['grid_width']
        self.uav_height = args['uav_height']
        self.uav_range = args['uav_range']
        self.local_compute = args['local_compute']
        self.uav_compute = args['uav_compute']
        self.cloud_compute = args['cloud_compute']
        self.reference_distance = args['reference_distance']
        self.los_channel_power = args['los_channel_power']
        self.uav_bandwidth = args['uav_bandwidth']
        self.cloud_bandwidth = args['cloud_bandwidth']
        self.uav_power = args['uav_power']
        self.cloud_power = args['cloud_power']
        self.noise_power = args['noise_power']
        self.propagation_time_factor = args['propagation_time_factor']
        self.local_energy_consumption_factor = args['local_energy_consumption_factor']
        self.punishment_factor = args['punishment_factor']
        self.cloud_channel_gain = args['cloud_channel_gain']   # H(t) used in equation 6 
    
    
    
    def uav_channel_gain(self, uav_pos, iiot_pos):
        
        a, b = uav_pos, iiot_pos
    
        distance = ((a[0]-b[0])**2 + (a[1]-b[1])**2)**(1/2)
        
        h_channel_condition = self.los_channel_power / ((self.uav_height**2) + (distance)**2)
        
        return h_channel_condition
    
    
    
    
    
    def uav_computation_time(self, uav_pos, iiot_pos, task_size, cpu_cycle):
        
        self.h_channel_condition = self.uav_channel_gain(uav_pos, iiot_pos)
        
        v1 = 1 + ((self.uav_power * self.h_channel_condition) / (self.noise_power) )    # value for log 
        uplink_transmission_rate = self.uav_bandwidth * (math.log(v1, 2))               # [equation - 5]  
        
        transmission_time = task_size / uplink_transmission_rate     # [equation -7]
        
        computation_time = cpu_cycle / self.uav_compute     #[equation -11] 
        
        execution_time = transmission_time + computation_time   
        
        return  execution_time
    
    
    
    
    
    def cloud_transmission_time(self, task_size):
        
        v1 = 1 + ((self.cloud_power * self.cloud_channel_gain) / (self.noise_power) ) 
        
        uplink_transmission_rate = self.cloud_bandwidth * (math.log(v1, 2))   #[equation -6]
        
        transmission_time = (task_size / uplink_transmission_rate) + self.propagation_time_factor     # [equation -8]

        return transmission_time
    
    
    def local_computation_time(self, cpu_cycle):
        
        local_computation_time = cpu_cycle / self.local_compute    # [equation-9]
    
        return local_computation_time
        
      

    
    def uav_energy_consumption(self, uav_pos, iiot_pos, task_size, cpu_cycle):
        
        execution_time= self.uav_computation_time(uav_pos, iiot_pos, task_size, cpu_cycle)           
                   
        uav_energy = self.uav_power * (execution_time)   #[equation -12]
    
        return uav_energy
    

    
    
    def cloud_energy_consumption(self, task_size):
         
        transmission_time = self.cloud_transmission_time( task_size)
        cloud_energy = self.cloud_power * (transmission_time + self.propagation_time_factor)   #[equation - 13]
        
        return cloud_energy
                   
      
    
    
    
    
    def local_energy_consumption(self, cpu_cycle):
       
        local_energy_consumption = self.local_energy_consumption_factor * (cpu_cycle ** 2)   #[equation - 10]
        
        return local_energy_consumption      
    
                   
        

### Environment() 


In [9]:


class Environment(object):
    
    def __init__(self,agrs ):
        self.num_agents = args['agent_number']
        self.num_uav = args['uav_number']
        self.grid_width = args['grid_width']
        self.uav_height = args['uav_height']
        self.uav_range = args['uav_range']

        self.task_size, self.cpu_cycle, self.tolerant_delay = self.task_model()
        
        self.action_space = np.arange(-1, self.num_uav+1)   # action_space = {-1, 0, 1, ...., N}
        
        self.users_observation = np.zeros([self.num_agents], np.int32)
        self.state_size = 3*(self.num_uav) + 6
        self.action_size = self.num_agents   
       
        self.UAVs_pos = self.UAVs_Position()
        self.iiot_pos=self.IIots_Position() 
        
        self.Maths = Maths(args)
        
    
    
    
    def task_model(self):
        
        task_size = random.sample(range(100, 80000), self.num_agents)   # task_size is taken in "Kb".
        cpu_cycle =random.sample(range(500000, 5000000000), self.num_agents)
        tolerant_delay =[ round(random.uniform(0.1, 1),7) for i in range(self.num_agents)]
        
        return task_size, cpu_cycle, tolerant_delay
    
    
    
    
    
    
    def UAVs_Position(self):
        
        UAVs_pos = {}
        x = random.sample(range(self.grid_width),self.num_uav)
        y = random.sample(range(self.grid_width),self.num_uav)
        
        for i in range(1,self.num_uav+1):
            
            point = [x[i-1],y[i-1],self.uav_height]
            UAVs_pos[i] = point
            
        return  UAVs_pos    # list of uav_positions
    
     
        
        
        
    def IIots_Position(self):
            
        iiot_pos = {}  
        
        for i in range(self.num_agents):
            
            x = random.randint(1, self.grid_width)
            y = random.randint(1, self.grid_width)
            point= (x,y)
            iiot_pos[i] =point   # list of iiot_positions 
            
        return iiot_pos 
    
    
    
    
    
    def state(self):   
        
        uav_pos = [self.UAVs_pos[i] for i in range(1, self.num_uav+1)]
        l = []
        for i in uav_pos:
            
            l.append(i[0])
            l.append(i[1])
            
        all_states = []
        for i in range(self.num_agents):

            state = [self.task_size[i], self.cpu_cycle[i], self.tolerant_delay[i], self.iiot_pos[i][0], self.iiot_pos[i][1]]
            state.extend(l)
            state.append(args['cloud_channel_gain'])
            h_channel_condition = [self.Maths.uav_channel_gain(j, self.iiot_pos[i]) for j in uav_pos]
            state.extend(h_channel_condition)
            
            all_states.append(state)
        
        return  all_states    
         
      
    
    
    
    
    def next_state(self):
        
        task_size, cpu_cycle, tolerant_delay = self.task_model()
        uav_pos = [self.UAVs_pos[i] for i in range(1, self.num_uav+1)]
        iiot_pos = self.IIots_Position()
        
        l = []  
        for i in range(self.num_uav):
            
            if uav_pos[i][0] + 30 <= self.grid_width:            # since uavs are moving with some speed in fixed area so taking it as 30.
                
                l.append(uav_pos[i][0]+30)
                uav_pos[i][0] = uav_pos[i][0]+30
                
            else: 
                
                l.append(uav_pos[i][0])
               
            
            if uav_pos[i][1] + 30 <= self.grid_width:
                
                l.append(uav_pos[i][1] + 30)
                uav_pos[i][1] = uav_pos[i][1]+30
                
            else: 
                
                l.append( uav_pos[i][1] )

            
        all_next_states = []
        for i in range(self.num_agents):

            next_state = [task_size[i], cpu_cycle[i], tolerant_delay[i], iiot_pos[i][0], iiot_pos[i][1]]
            next_state.extend(l)
            next_state.append(args['cloud_channel_gain'])
            h_channel_condition = [self.Maths.uav_channel_gain(j, iiot_pos[i]) for j in uav_pos]
            next_state.extend(h_channel_condition)
            
            all_next_states.append(next_state)
        
        return  all_next_states 
         
      
    
    
    
    def reward_calculate(self, execution_time, energy_consumption, tolerant_delay):
        
        if execution_time <= tolerant_delay: 
            
            return 1/energy_consumption,  energy_consumption
        
        else:
            
            return (1/energy_consumption)* args['punishment_factor'] , energy_consumption
        
        
        
        
        
    def reward(self, agent_action, uav_pos, iiot_pos,task_size, cpu_cycle, tolerant_delay): 
        
        if agent_action == -1:   # offload to cloud
            
            execution_time = self.Maths.cloud_transmission_time(task_size)
            energy_consumption = self.Maths.cloud_energy_consumption(task_size)
            reward, energy_consumption = self.reward_calculate(execution_time, energy_consumption, tolerant_delay)
            
            return reward, energy_consumption
        
        elif agent_action == 0:  # compute locally
            
            execution_time = self.Maths.local_computation_time(cpu_cycle)
            energy_consumption = self.Maths.local_energy_consumption(cpu_cycle)
            reward, energy_consumption= self.reward_calculate(execution_time, energy_consumption, tolerant_delay)
            
            return reward, energy_consumption
        
        else: # offload to UAV
            
            execution_time = self.Maths.uav_computation_time(uav_pos, iiot_pos, task_size, cpu_cycle)
            energy_consumption = self.Maths.uav_energy_consumption(uav_pos, iiot_pos, task_size, cpu_cycle)
            reward, energy_consumption= self.reward_calculate(execution_time, energy_consumption, tolerant_delay)
            
            return reward, energy_consumption
    
    
    def done(self, actions):
        
        done = []
        for i in range(len(actions)):
            
            if actions[i]> 0 :  #uav task 
                
                a, b = self.UAVs_pos[actions[i]], self.iiot_pos[i]
                distance = ((a[0]-b[0])**2 + (a[1]-b[1])**2)**(1/2)
                execution_time = self.Maths.uav_computation_time( self.UAVs_pos[actions[i]], self.iiot_pos[i], self.task_size[i], self.cpu_cycle[i])
                
                if (execution_time <= self.tolerant_delay[i]) and (distance <= args['uav_range']):
                    
                    done.append(False)
                    
                else :
                    
                    done.append(True)
                    
            
            elif actions[i] == 0:  # compute_locally
                
                execution_time = self.Maths.local_energy_consumption(self.cpu_cycle[i])
                
                if execution_time <= self.tolerant_delay[i] :
                    
                    done.append(False)
                    
                else: 
                    
                    done.append(True)
            
            
            else : # cloud task 
                
                execution_time = self.Maths.cloud_transmission_time(self.task_size[i])
                if execution_time <= self.tolerant_delay[i]:
                    
                    done.append(False)
                    
                else:
                    
                    done.append(True)
         
        return done
    
                
             
    
    
    
    
    def step(self, actions):  
        
        rewards = []
        energy_consumption=[]
        
        for i,act in enumerate(actions): 
                
                if act > 0:               # uav task
                    
                    r, e_c = self.reward(act, self.UAVs_pos[act], self.iiot_pos[i], self.task_size[i], self.cpu_cycle[i], self.tolerant_delay[i])
                
                elif act == 0:
                    
                    r, e_c= self.reward(act, [0,0,0],(0,0),self.task_size[i], self.cpu_cycle[i], self.tolerant_delay[i])
                
                else : 
                    
                    r, e_c = self.reward(act, [0,0,0],(0,0),self.task_size[i], self.cpu_cycle[i], self.tolerant_delay[i])
                
                rewards.append(r)
                energy_consumption.append(e_c)
        
        done = self.done(actions)   
        next_state = self.next_state()    
       
        return next_state, rewards, energy_consumption, done, self.task_size, self.cpu_cycle, self.tolerant_delay
                    
        
        
        
            
    def reset(self):
        
        self.UAV_Pos()
        self.IIOT_Pos()
             

### ENV  

In [10]:
class ENV(object):
    def __init__(self, args):
        
        self.step_b_update = args['step_b_update']
    
        
        
    def main(self):
        
        total_step = 0
        for episode in range(1): 
            
            print("mdspr")
            print(f"--------------\n1. Number of iiot devices : {args['agent_number']}")
            print(f"2. Total Training steps : {args['max_timesteps']}\n\n\n")
    
           
            time_step = 0
            all_loss = []
            all_done=[]
            all_rewards = []
            all_actions = []
            all_energy_consumption=[]
            all_task_size = []
            all_cpu_cycle=[]
            all_tolerant_delay=[]
            
            while time_step < args['max_timesteps']:
                
                env = Environment(args)
                if total_step==0:
                    state = env.state()
                    agent = Agent(env.state_size, env.action_size, args)
                    
                print("Training Step : ", time_step,"\n")
                
                actions = []   
                for i in range(args['agent_number']):
                    
                    actions.append(agent.choose_action(tf.convert_to_tensor(state[i]))) 
                
                next_state, reward,energy_consumption, done,  task_size, cpu_cycle, tolerant_delay = env.step(actions)
                
                next_state_copy = next_state
                
                state = np.reshape(state, (args['agent_number'], args['uav_number']*3 +6))
                state = tf.convert_to_tensor(state)
                
                next_state = np.reshape(next_state, (args['agent_number'], args['uav_number']*3 +6))
                next_state = tf.convert_to_tensor(next_state)
                
                batch = []
                if args['memory_type']=="per":  # prioritised experience replay memory 
                    
                    for i in range(args['agent_number']):

                        batch.append((0, (state[i], actions[i], reward[i], next_state[i], done[i])))

                    loss = agent.per_observe(batch)   

                    if (total_step % args['step_b_update'] == 0) and (total_step!=0):

                        agent.per_replay()
                        
                
                else :   # uniform experience replay
                    
                    for i in range(args['agent_number']):
        
                        batch.append((state[i], actions[i], reward[i], next_state[i], done[i]))
                
                    loss = agent.uer_observe(batch)   
                
                    if (total_step % args['step_b_update'] == 0) and (total_step!=0):
            
                        agent.uer_replay()
                    


                        
                if (total_step % args['target_frequency'])  and (total_step!=0)  == 0:
                    
                    agent.update_target_model()
                    
          #-------------------------------
                loss_list=[]
                for i in range(args['agent_number']):
                    loss_list.append(loss[i])
                
                all_task_size.append(task_size)
                all_cpu_cycle.append(cpu_cycle)
                all_tolerant_delay.append(tolerant_delay)
                all_energy_consumption.append(energy_consumption)
                all_actions.append(actions)
                all_rewards.append(reward)
                all_done.append(done)
                all_loss.append(loss_list)       
         #---------------------------------            
                
                time_step +=1
                total_step +=1
                state = next_state_copy
                
            print("\n","*"*110,"\n"*5)
            
            
            return all_loss, all_energy_consumption, all_done, all_rewards, all_actions, all_task_size, all_cpu_cycle, all_tolerant_delay
        
            
            
            

In [11]:
if __name__ == "__main__":
    
    for i in [30]:
        parser = argparse.ArgumentParser()


        parser.add_argument("-f")
        parser.add_argument("-lr", "--learning_rate", default=0.0001, type=float, help="learning rate")
        parser.add_argument("-tf", "--target_frequency", default=400, type=int, help="target weights replace steps") # in paper 500
        parser.add_argument("-bs", "--batch_size", default=50, type=int, help="batch size")
        parser.add_argument("-ga", "--gamma", default=0.7, type=float, help="reward decay rate")
        parser.add_argument("-e", "--epsilon", default=0.9, type=float, help="exploration rate")
        parser.add_argument("-c", "--memory_capacity", default=4000, type=int, help="replay memory capacity")
        parser.add_argument("-nn", "--number_nodes", default=100, type=int, help="number of nodes in each layer of neural network")
        parser.add_argument("-m", "--agent_number", default=i, type=int, help="total number of iiot devices")  # 1000 used in paper 
        parser.add_argument("-uav", "--uav_number", default=5, type=int, help="total number of UAVs")
        parser.add_argument("-g", "--grid_width", default=800, type=int, help="size of fixed area under consideration")
        parser.add_argument("-H", "--uav_height" , default=100, type=int, help="flying height of UAV  'in meters'")
        parser.add_argument("-r", "--uav_range", default=300, type=int, help="communication range of UAV 'in meters'")
        parser.add_argument("-cl", "--local_compute", default=500e+6, type=float, help="local computation capacity 'cycle/second'")  # 500MHz
        parser.add_argument("-cu", "--uav_compute", default=2e+9, type=float, help="UAV compution capacity 'cycle/second'")  # 2GHz
        parser.add_argument("-cc", "--cloud_compute", default=100e+9, type=float, help="cloud computation capacity 'cycle/second'")  #  100GHz
        parser.add_argument("-rd", "--reference_distance", default=1, type=float, help="channel gain reference distance 'meters'")
        parser.add_argument("-lcp", "--los_channel_power", default=1.42e-4, type=float, help="channel gain at the reference")
        parser.add_argument("-ub", "--uav_bandwidth", default=15e+6, type=float, help="bandwidth allocated for UAV uplin transmission rate 'cycles/second'")  # 15MHz
        parser.add_argument("-cb", "--cloud_bandwidth", default=10e+6, type=float, help="bandwidth allocated for cloud uplink transmission 'cycles/second'")  #  10MHz
        parser.add_argument("-up","--uav_power", default=0.01, type=float, help="uplink transmission power for UAV offloading  'W'")
        parser.add_argument("-cp", "--cloud_power", default=0.015, type=float, help="uplink transmission power for cloud offloading  'W'")
        parser.add_argument("-n", "--noise_power", default=1e-12, type=float, help="background noise power  'Watt-second'") # -90 dBm/Hz
        parser.add_argument("-ptf", "--propagation_time_factor", default=3.2e-5, type=float, help="uplink propogation delay factor  's/Kb'")  # 4e-9 s/bit
        parser.add_argument("-lec", "--local_energy_consumption_factor", default=1e-23, type=float, help="local energy consumption factor 'theta' J/cycle")


          # value not given in paper
          #------------------------------------------------------------------------------------------------------------------------------------
        parser.add_argument("-ccg","--cloud_channel_gain", default=280.141199827, type=float, help="cloud channel gain H(t)")
        parser.add_argument("-b_stp", "--step_b_update",default=400, type=int, help="steps between updating the network")
        parser.add_argument("-pf", "--punishment_factor", default=0.0001, type = float, help="if tolerant delay < energy consumption")  
        parser.add_argument("-p", "--priority_scale", default=0.5, type=float, help="scale for prioritization")  
        parser.add_argument("-m_e", "--min_epsilon", default=0.02, type=float, help="minimum value of exploration rate")
        parser.add_argument("-e_d", "--epsilon_decay", default=1e-4, type=float, help="exploration decay rate")
        parser.add_argument('-m_b', "--min_beta", default=0.4, type=float, help="minimum value of importance sampling")
        parser.add_argument("-b_d","--beta_increment", default=1e-4, type=float, help="for incrementing beta value")
        parser.add_argument("-b_max", "--beta_max", default=0.9, type=float, help="incrementing value of importance sampling beta")
        parser.add_argument("-ts", "--max_timesteps", default=3500, type=int, help="maximum timesteps in each epsisode")  ## value not given in paper
        parser.add_argument("-ed", "--episodes", default=1, type=int, help="total number of episodes")    ## value not given in paper 

        parser.add_argument("-mt", "--memory_type", choices=['uer', 'per'], default="per", help="per: prioritised experience replay, uer: uniform experience replay")
        parser.add_argument('-ddqn', "--double_dqn", choices=['dqn',"double_dqn"], default="double_dqn", help ="double deep Q network or DQN ")
        parser.add_argument("-dueling","--dueling", default=False, type=bool, help="Dueling option")
          #-------------------------------------------------------------------------------------------------------------------------------------

        # cloud_channel_gain = 148.1 + 40 ∗ log10 distance(km)   (taking distance as 20Km)  = 148.1 + 40* (log10 2000)


        args = vars(parser.parse_args())

        env = ENV(args)

        all_loss, all_energy_consumption, all_done, all_reward, all_action, all_task_size, all_cpu_cycle, all_tolerant_delay = env.main()

        df_loss = pd.DataFrame(all_loss)
        df_energy_consumption = pd.DataFrame(all_energy_consumption) 
        df_done = pd.DataFrame(all_done)
        df_reward = pd.DataFrame(all_reward)
        df_action = pd.DataFrame(all_action)
        df_task_size=pd.DataFrame(all_task_size)
        df_cpu_cycle=pd.DataFrame(all_cpu_cycle)
        df_tolerant_delay=pd.DataFrame(all_tolerant_delay)
        

#         df_loss.to_csv("datasets/fig_8/xyz_md2spr_df_loss.csv")
#         df_energy_consumption.to_csv("datasets/fig_8/xyz_md2spr_df_energy_consumption.csv")
#         df_done.to_csv("datasets/fig_8/xyz_md2spr_df_done.csv")
#         df_reward.to_csv("datasets/fig_8/xyz_md2spr_reward.csv")
#         df_action.to_csv("datasets/fig_8/xyz_md2spr_df_action.csv")
#         df_task_size.to_csv("datasets/fig_8/xyz_md2spr_df_task_size.csv")
#         df_cpu_cycle.to_csv("datasets/fig_8/xyz_md2spr_df_task_size.csv")
#         df_tolerant_delay.to_csv("datasets/fig_8/xyz_md2spr_df_task_size.csv")


mdspr
--------------
1. Number of iiot devices : 50
2. Total Training steps : 500



Training Step :  0 

Training Step :  1 

Training Step :  2 

Training Step :  3 

Training Step :  4 

Training Step :  5 

Training Step :  6 

Training Step :  7 

Training Step :  8 

Training Step :  9 

Training Step :  10 

Training Step :  11 

Training Step :  12 

Training Step :  13 

Training Step :  14 

Training Step :  15 

Training Step :  16 

Training Step :  17 

Training Step :  18 

Training Step :  19 

Training Step :  20 

Training Step :  21 

Training Step :  22 

Training Step :  23 

Training Step :  24 

Training Step :  25 

Training Step :  26 

Training Step :  27 

Training Step :  28 

Training Step :  29 

Training Step :  30 

Training Step :  31 

Training Step :  32 

Training Step :  33 

Training Step :  34 

Training Step :  35 

Training Step :  36 

Training Step :  37 

Training Step :  38 

Training Step :  39 

Training Step :  40 

Training Step :  41 

T

Training Step :  357 

Training Step :  358 

Training Step :  359 

Training Step :  360 

Training Step :  361 

Training Step :  362 

Training Step :  363 

Training Step :  364 

Training Step :  365 

Training Step :  366 

Training Step :  367 

Training Step :  368 

Training Step :  369 

Training Step :  370 

Training Step :  371 

Training Step :  372 

Training Step :  373 

Training Step :  374 

Training Step :  375 

Training Step :  376 

Training Step :  377 

Training Step :  378 

Training Step :  379 

Training Step :  380 

Training Step :  381 

Training Step :  382 

Training Step :  383 

Training Step :  384 

Training Step :  385 

Training Step :  386 

Training Step :  387 

Training Step :  388 

Training Step :  389 

Training Step :  390 

Training Step :  391 

Training Step :  392 

Training Step :  393 

Training Step :  394 

Training Step :  395 

Training Step :  396 

Training Step :  397 

Training Step :  398 

Training Step :  399 

Training St