### Libraries

In [None]:
import numpy as np
import os 
import random 
import argparse 
import pandas as pd 
import glob
import matplotlib.pyplot as plt 


### NeuralNetwork


    

In [None]:
import os
from keras.models import Sequential, Model
from keras.layers import Dense, Lambda, Input, Concatenate, Activation
from keras.optimizers import adam_v2
import tensorflow as tf
from keras import backend as K


class NeuralNetwork(object):
    
    def __init__(self, state_size, action_size, args):
        self.state_size = state_size  
        self.action_size = action_size
        self.batch_size = args['batch_size']
        self.learning_rate = args['learning_rate']
        self.num_nodes = args['number_nodes']    # number of nodes in each layer of NN
        self.model = self.build_model()
        self.model_ = self.build_model()
        self.input_shape = 3*args['uav_number'] + 6
        self.output_shape = args['uav_number'] + 2
    
    def build_model(self):
        
        # x is the input to the network 
        x = Input(shape=(3*args['uav_number'] + 6,))

        # a series of fully connected layer for estimating Q(s,a) (value of actions from that state)

        y1 = Dense(self.num_nodes, activation='relu')(x)
        y2 = Dense(self.num_nodes, activation='relu')(y1)
        z = Dense(args['uav_number'] + 2, activation="softmax")(y2)

        model = Model(inputs=x, outputs=z)
        optimizer = adam_v2.Adam(learning_rate=self.learning_rate)
        model.compile(loss="mse", optimizer=optimizer)
        return model
        
    
    def train(self, x, y, sample_weight=None, epochs=1, verbose=0):  #x is the input to the network and y is the output
        self.model.fit(x, y, batch_size=len(x), sample_weight=sample_weight, epochs=epochs, verbose=verbose)
        
        
     
    def predict(self, state, target=False):
        
        if target:  # get prediction from target network
            return self.model_.predict(state)
        else: 
            return self.model.predict(state)
        
    
    def predict_one_sample(self, state, target=False):   # used for exploitation
       
        self.predict(state, target=target)
        
        
    def update_target_model(self):
        self.model_.set_weights(self.model.get_weights())
           
        
        
        

In [None]:
# state_size = 3*(5)+2
# action_size = 7

# obj = NeuralNetwork(state_size, action_size, args)


### SumTree
<br>



In [None]:
import numpy as np 

class SumTree(object):
    
    def __init__(self, capacity):
        self.write = 0 
        self.capacity = capacity
        self.tree = np.zeros(2*capacity - 1)
        self.data = np.zeros(capacity, dtype=object)
        
    def add(self, priority, data):
        idx = self.write + self.capacity - 1
        
        self.data[self.write] = data
        self.update(idx, priority)
        
        self.write += 1
        
        # overwrite to the first index if the memory capacity is completed
        if self.write >= self.capacity:
            self.write = 0
            
    def total(self):
        return self.tree[0]
            
    def update(self, idx, priority):
        change = priority - self.tree[idx]
        
        self.tree[idx] = priority
        
        # propagate the change through tree
        while idx !=0:
            idx = (idx -1) // 2
            self.tree[idx] +=change
     
    def retrieve(self, idx, s):
        left = 2*idx + 1
        right = left + 1
        
        if left >= len(self.tree):
            return idx
        
        if s<= self.tree[left]:
            return self.retrieve(left, s)
        else : 
            return self.retrieve(right, s - self.tree[left])
        
            
            
    def get(self, s):
        idx = self.retrieve(0, s)
        dataIdx = idx - self.capacity + 1
        
        return idx, self.tree[idx], self.data[dataIdx]   
        # here returning leaf index, priority value and experience of that leaf index
        
        
        

In [None]:
#obj = SumTree(args['memory_capacity'])

### PriortizedExperienceReplay



In [None]:
import random  

class ReplayMemory(object):
    e = 0.05    # to avoid 0 probability of experiences 
    
    def __init__(self, capacity, priority_scale):
        self.capacity = capacity 
        self.priority_scale = priority_scale      # a in formula, used for balance b/w high priority and random sampling
        self.max_priority = 0 
        
        self.memory = SumTree(self.capacity)    
       
    def get_priority(self, TDerror):
        return (TDerror + self.e) ** self.priority_scale
    
    def remember(self, sample, TDerror):
        priority = self.get_priority(TDerror)
        self_max = max(self.max_priority, priority)
        self.memory.add(self_max, sample)
        
        
    def sample(self, batch_size):
        sample_batch = []
        sample_batch_indices = []
        sample_batch_priorities = []
        
        num_segments = self.memory.total() / batch_size
        
        for i in range(batch_size):
            left = num_segments * i 
            right = num_segments * (i + 1)
            
            s = random.uniform(left, right)
            idx, priority, data = self.memory.get(s)
            
            sample_batch.append((idx,data))
            sample_batch_indices.append(idx)
            sample_batch_priorities.append(priority)
            
        return [sample_batch, sample_batch_indices, sample_batch_priorities]
    
    
    def update(self, batch_indices, errors):
        
        for i in range(len(batch_indices)):
            priority = self.get_priority(errors[i])
            self.memory.update(batch_indices[i], priority)
        
        
        
        

In [None]:
#obj = ReplayMemory(args['memory_capacity'], 0.4)

### Agent



In [None]:
# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@


class Agent(object):
    
    def __init__(self, state_size, action_size, agent_idx, arguments):
        
        self.state_size = state_size
        self.action_size = action_size
        self.agent_idx = agent_idx
        
        self.learning_rate = arguments['learning_rate']
        self.update_target_frequency = arguments['target_frequency']
        self.batch_size = arguments["batch_size"]
        
    
        self.gamma = arguments["gamma"]
        self.epsilon = arguments['epsilon']
        self.min_epsilon = arguments['min_epsilon']
        self.epsilon_decay = arguments['epsilon_decay']
        self.beta = arguments['min_beta']
        self.beta_max = arguments['beta_max']
        
        self.step = 0
        
        self.dqn_model = NeuralNetwork(self.state_size, self.action_size, arguments)
        self.memory = ReplayMemory(arguments['memory_capacity'], arguments['priority_scale'])
     
    
    def decay_epsilon(self):
        self.step +=1
        
        if (self.epsilon > self.min_epsilon):
            self.epsilon = self.min_epsilon + (self.epsilon - self.min_epsilon) * math.exp(-1. * self.step * self.epsilon_decay)
            return self.epsilon
        else : 
            return self.min_epsilon
        
        if self.beta < self.beta_max:
            self.beta+=0.01
            #self.beta = self.beta + (self.beta - self.beta_max) * math.exp(-1. * self.step * 0.01)  #@@@@@@@@@@@ incrementing beta by 0.01
    
    def choose_action(self, state):
        exploration_rate = self.decay_epsilon()
        
        if exploration_rate > random.random():
            return random.randrange(-1,args['uav_number']+1)  #explore
        else : 
            return np.argmax(self.dqn_model.predict_one_sample(state))
        
                         
    def batch_error(self, batch):   # batch = [[priority, sample]] = [[0, (states, actions, rewards, next_states, done)]]
        
        
        batch_len = len(batch)
       
        states = np.array([batch[i][1][0] for i in range(batch_len)])
        states_ = np.array([batch[i][1][3] for i in range(batch_len)])
        
        predict = []
        predict_ =[]
        predict_target = []
        for i in range(batch_len):
           
            predict.append(self.dqn_model.predict(states[i]))
            predict_.append(self.dqn_model.predict(states_[i]))
            predict_target.append(self.dqn_model.predict(states_[i], target=True))
            
        x = np.zeros((batch_len, self.state_size))
        
        y = np.zeros ((batch_len, args['uav_number']+2))
       
        
        errors = np.zeros(batch_len)
        
        for i in range(batch_len):
            
            o = batch[i][1]                   # tuple = (state, action, reward, next_state, done)
           
            state = o[0]                     #state array 
            
            action = o[1][self.agent_idx] 
            
            reward = o[2]      # reward value 
            
            
            next_state = o[3]    # next_state
           
            done = o[4]
             
            q_value = predict[i]
            next_q_value = predict_target[i]
            
            if done: 
                target_q_value = reward
            else: 
                target_q_value  = reward + self.gamma * np.amax(next_q_value)
            
            
            x[i] = state
            y[i] = action 
            
            errors[i] = np.abs(target_q_value- np.amax(q_value))
        
     
        return [x, y, errors]
    
    
    def observe(self, sample):
        
        _, _, errors = self.batch_error([(0, sample)])
        
        self.memory.remember(sample, errors)
        
        
  
    
    def replay(self):
        
        [batch, batch_idx, batch_priorities] = self.memory.sample(self.batch_size)
        
        x, y, errors = self.batch_error(batch)
        
        
        normalized_batch_priorities = [float(i) / sum(batch_priorities) for i in batch_priorities]
        
        # b_values = importance sampling weights 
        b_values = [(self.batch_size * i) ** (-1 * self.beta) for i in normalized_batch_priorities]
        
        normalized_b_values = [float(i) / max(b_values) for i in range(len(b_values))]
        
        sample_weights = [errors[i] * normalized_b_values[i] for i in range(len(errors))]
        
        self.dqn_model.train(x, y, np.array(sample_weights))
        
        self.memory.update(batch_idx, errors)
        
        
        
    def update_target_model(self):
        if self.step % self.update_target_frequency == 0 : 
            self.dqn_model.update_target_model()
    
    
    
    
    
    

In [None]:
# state_size = 3*(5)+6
# action_size = 1000
# device_i =10

# obj = Agent(state_size, action_size, args)

### Maths()

In [None]:
import math
class Maths(object):
    
    def __init__(self,args):
        
        self.num_agents = args['agent_number']
        self.num_uav = args['uav_number']
        self.grid_width = args['grid_width']
        self.uav_height = args['uav_height']
        self.uav_range = args['uav_range']
        self.local_compute = args['local_compute']
        self.uav_compute = args['uav_compute']
        self.cloud_compute = args['cloud_compute']
        self.reference_distance = args['reference_distance']
        self.los_channel_power = args['los_channel_power']
        self.uav_bandwidth = args['uav_bandwidth']
        self.cloud_bandwidth = args['cloud_bandwidth']
        self.uav_power = args['uav_power']
        self.cloud_power = args['cloud_power']
        self.noise_power = args['noise_power']
        self.propagation_time_factor = args['propagation_time_factor']
        self.local_energy_consumption_factor = args['local_energy_consumption_factor']
        self.task_size = args['task_size']
        self.cpu_cycle = args['cpu_cycle']
        self.tolerant_delay = args['tolerant_delay']
        self.punishment_factor = args['punishment_factor']
        
        

        # value not given in paper
        # -----------------------------------------------------------------
        self.a = 0.2   
        self.b = 0.3   
        self.attenuation_coefficient = 0.2
        self.nue_los = 0.3
        self.nue_nlos = 0.3
        self.cloud_channel_gain = 0.3   # H(t) used in equation 6     
        #-------------------------------------------------------------------
        
        
        
            
    def uav_channel_gain(self, uav_pos, iiot_pos):  # takes position of a uav and iiot_device.    
        
        #P_los = 1/(1+(self.a * math.exp(-self.b * (math.atan(self.uav_height/uav_pos[0]))) - self.a))      # [equation -1]
        P_los = 0.63
        P_nlos = 1 - P_los            
        
        a, b = uav_pos, iiot_pos
        distance = ((a[0]-b[0])**2 + (a[1]-b[1])**2)**(1/2)
        PL_los = self.los_channel_power * ((math.sqrt(self.uav_height**2 + (distance)**2)) **(-self.attenuation_coefficient)) * self.nue_los      # [equation-2]                                      
        
        PL_nlos = self.los_channel_power * ((math.sqrt(self.uav_height**2 + (distance)**2)) **(-self.attenuation_coefficient)) * self.nue_nlos      # [equation-3]
    
        h_channel_condition = (P_los * PL_los) + (P_nlos * PL_nlos)        # [equation - 4]
            
        return h_channel_condition
                   
    
    def uav_energy_consumption(self, uav_pos, iiot_pos):
        self.h_channel_condition = self.uav_channel_gain(uav_pos, iiot_pos)
        
    
        v1 = 1 + ((self.uav_power * self.h_channel_condition) / (self.noise_power) )    # value for log 
        uplink_transmission_rate = self.uav_bandwidth * (math.log(v1, 2))               # [equation - 5]  
        
        transmission_time = self.task_size / uplink_transmission_rate     # [equation -7]
          
        computation_time = self.cpu_cycle / self.uav_compute     #[equation -11]
                   
                   
        uav_energy = self.uav_power * (transmission_time + computation_time)   #[equation -12]
        
        return uav_energy
                   
                   
                   
    def cloud_energy_consumption(self):
         
        v1 = 1 + ((self.cloud_power * self.cloud_channel_gain) / (self.noise_power) ) 
        uplink_transmission_rate = self.cloud_bandwidth * (math.log(v1, 2))   #[equation -6]
        
        transmission_time = (self.task_size / uplink_transmission_rate) + self.propagation_time_factor     # [equation -8]
                   
        cloud_energy = self.cloud_power * (transmission_time + self.propagation_time_factor)   #[equation - 13]        
                   
        return cloud_energy
                   
                   
    
    def local_energy_consumption(self):
        return self.local_energy_consumption_factor * (self.cpu_cycle ** 2)      #[equation - 10]
    
                   
                   
        

In [None]:
#obj = Maths(args)

### Environment() 


In [None]:


class Environment(object):
    
    def __init__(self,agrs ):
        self.num_agents = args['agent_number']
        self.num_uav = args['uav_number']
        self.grid_width = args['grid_width']
        self.uav_height = args['uav_height']
        self.uav_range = args['uav_range']
        self.local_compute = args['local_compute']
        self.cloud_compute = args['cloud_compute']
        self.reference_distance = args['reference_distance']
        self.los_channel_power = args['los_channel_power']
        self.uav_bandwidth = args['uav_bandwidth']
        self.cloud_bandwidth = args['cloud_bandwidth']
        self.uav_power = args['uav_power']
        self.cloud_power = args['cloud_power']
        self.noise_power = args['noise_power']
        self.propagation_time_factor = args['propagation_time_factor']
        self.local_energy_consumption_factor = args['local_energy_consumption_factor']
        self.task_size = args['task_size']
        self.cpu_cycle = args['cpu_cycle']
        self.tolerant_delay = args['tolerant_delay']
        self.punishment_factor = args['punishment_factor']
        self.uav_compute = args["uav_compute"]
        
        # -----------------------
        # value not given in paper
        self.a = 0.2   
        self.b = 0.3   
        self.attenuation_coefficient = 0.2
        self.nue_los = 0.3
        self.nue_nlos = 0.3
        self.cloud_channel_gain = 0.3   # H(t) used in equation 6     
        # ------------------------
        
        
        self.action_space = np.arange(-1, self.num_uav+1)   # action_space = {-1, 0, 1, ...., N}
        self.users_space = np.zeros([self.num_agents], np.int32)
        self.users_observation = np.zeros([self.num_agents], np.int32)
        self.state_size = 3*(self.num_uav) + 6
        self.action_size = args['uav_number']+2    
        self.cloud_channel_gain = 0.3   # H(t) used in equation 6 
       
        self.UAVs_pos = self.UAVs_Position()
        self.iiot_pos=self.IIots_Position() 
        self.terminal = False
        
        self.Maths = Maths(args)
        
    
    def UAVs_Position(self): # choose random (x,y) position of UAV on grid with constant ht=uav_height 
        UAVs_pos = {}
        x = random.sample(range(self.grid_width),self.num_uav)
        y = random.sample(range(self.grid_width),self.num_uav)
        
        for i in range(1,self.num_uav+1):
            point = (x[i-1],y[i-1],self.uav_height)
            UAVs_pos[i] = point
        return  UAVs_pos    # list of uav_positions
    
     
        
    def IIots_Position(self): # choose random (x,y) position of iiot on grid
        iiot_pos = {}
        for i in range(self.num_agents):
            x = random.randint(1, self.grid_width)
            y = random.randint(1, self.grid_width)
            point = (x,y)
            iiot_pos[i] = point   # list of iiot_positions 
        return iiot_pos 
    
    
    def state(self):     # for each iiot_device state vector is different because it takes the current position of iiot_device
        uav_pos = [self.UAVs_pos[i] for i in range(1, self.num_uav+1)]
        l = []
        for i in uav_pos:
            l.append(i[0])
            l.append(i[1])
            
        all_states = []

        
        for i in range(self.num_agents):

            state = [self.task_size, self.cpu_cycle, self.tolerant_delay, self.iiot_pos[i][0], self.iiot_pos[i][1]]
            state.extend(l) # l =  uav_pos
            state.append(self.cloud_channel_gain) 
            h_channel_condition = [self.Maths.uav_channel_gain(j, self.iiot_pos[i]) for j in uav_pos] 
            state.extend(h_channel_condition)
            
            state = np.array(state)
            state = state.reshape(1,(args['uav_number']*3)+6)
            #state = tf.convert_to_tensor(state)
            all_states.append(state)
        # print(all_states)    
        return  all_states    #@@@@@@@@@@@@@@@@@@@@@@ is one_hot encoding required? 
         
            
            
            
    def reset(self):
        self.UAV_Pos()
        self.IIOT_Pos()
        
    
    def step(self, actions):  # it takes a list of actions 
        rewards = []
        for i,j in enumerate(actions): 
                if j > 0:               # uav task
                    r = self.reward(j, self.UAVs_pos[j], self.iiot_pos[i])
                
                elif j == 0:
                    r = self.reward(j)
                
                else : 
                    r = self.reward(j)
                
                rewards.append(r)
        
                
        if sum(rewards):      #@@@@@@@@@@@@@@@@@@@@@@  any specific condition? 
            self.terminal = [False for i in range(len(actions))]
        else: 
            self.terminal = True
            
        next_state = self.state()     #@@@@@@@@@@@@@@@@@@ how to get next_state?
        
        return next_state, rewards, self.terminal
                    
                    
     
    
    
    def reward_calculate(self, energy_consumption):
        
        if energy_consumption <= self.tolerant_delay: 
            return 1/energy_consumption
        else:
            return (1/energy_consumption)- self.punishment_factor 
        
        
    def reward(self, agent_action, uav_pos = None, iiot_pos = None):  ## Maths.uav_energy_consumption takes 2 parameters
        
        if agent_action == -1:   # offload to cloud
            energy_consumption = Maths.cloud_energy_consumption(self)   
            return self.reward_calculate(energy_consumption)
        
        elif agent_action == 0:  # compute locally
            energy_consumption = Maths.local_energy_consumption(self)
            return self.reward_calculate(energy_consumption)
        
        else: # offload to UAV
            obj = Maths(args)
            energy_consumption = obj.uav_energy_consumption(uav_pos, iiot_pos)
            return self.reward_calculate(energy_consumption)
        
    def sample(self):
        x =  np.random.choice(self.action_space,size=self.num_agents)
        return x   
        

### ENV class

In [None]:

class ENV(object):
    def __init__(self, args):
        self.action_space = np.arange(-1, args['uav_number']+1)   # action_space = {-1, 0, 1, ...., N}
        self.users_space = np.zeros([args['agent_number']], np.int32)
        self.users_observation = np.zeros([args['agent_number']], np.int32)
        self.state_size = 3*(args['uav_number']) + 6
        self.action_size = args['uav_number']+2
        self.env = Environment(args)
        self.dqn_model = NeuralNetwork(self.state_size, self.action_size, args)
        self.step_b_updates = 1
        
    def main(self, agents): # MDSPR Algorithm
        total_step = 0
        rewards_list = []

        for episode in range(args['episodes']): 

            state = self.env.state()  # list of initial state vector for all iiot_devices.
            done = False
            reward_all = []
            time_step = 0
            
           
            while not done and time_step < args['max_timesteps']:
                
                actions = []   
                    
                for i in range(args['agent_number']): # using epsilon greedy to choose the action
                    actions.append(agents[i].choose_action(state[i])) # appending action of each agent into actions list. 
                
                
                next_state, reward, done = self.env.step(actions)
        
                for i in range(len(agents)): # experience replay
            
                    agents[i].observe((state[i], actions, reward[i], next_state[i], done[i]))

                    agents[i].decay_epsilon()

                   
                    
                    
                    if time_step % self.step_b_updates  and (time_step!=0)== 0 : 
                        
                        agents[i].replay()

                    if time_step % args['target_frequency'] and (time_step!=0) == 0:
                       
                        agents[i].update_target_model()
                        
                #print("total_step : ", total_step,"\n\n")
                time_step +=1
                total_step +=1
                state = next_state
                reward_all.append(reward)

            rewards_list.append(reward_all)
            print(f"episode {episode} -----reward : {reward_all} --------Final step : {time_step}, --------done : {done}\n\n\n\n\n")


### Main Function

In [None]:
if __name__ == "__main__":
    
    parser = argparse.ArgumentParser()
    
    
    parser.add_argument("-f")
    parser.add_argument("-lr", "--learning_rate", default=0.0001, type=float, help="learning rate")
    parser.add_argument("-tf", "--target_frequency", default=2, type=int, help="target weights replace steps")
    parser.add_argument("-bs", "--batch_size", default=50, type=int, help="batch size")
    parser.add_argument("-ga", "--gamma", default=0.7, type=float, help="reward decay rate")
    parser.add_argument("-e", "--epsilon", default=0.9, type=float, help="exploration rate")
    parser.add_argument("-c", "--memory_capacity", default=10000, type=int, help="replay memory capacity")
    parser.add_argument("-nn", "--number_nodes", default=100, type=int, help="number of nodes in each layer of neural network")
    parser.add_argument("-m", "--agent_number", default=3, type=int, help="total number of iiot devices")
    parser.add_argument("-uav", "--uav_number", default=5, type=int, help="total number of UAVs")
    parser.add_argument("-g", "--grid_width", default=800, type=int, help="size of fixed area under consideration")
    parser.add_argument("-H", "--uav_height" , default=100, type=int, help="flying height of UAV")
    parser.add_argument("-r", "--uav_range", default=300, type=int, help="communication range of UAV")
    parser.add_argument("-cl", "--local_compute", default=500, type=int, help="local computation capacity 'MHz'")
    parser.add_argument("-cu", "--uav_compute", default=2, type=int, help="UAV compution capacity 'GHz'")
    parser.add_argument("-cc", "--cloud_compute", default=100, type=int, help="cloud computation capacity 'GHz'")
    parser.add_argument("-rd", "--reference_distance", default=1, type=float, help="channel gain reference distance 'meters'")
    parser.add_argument("-lcp", "--los_channel_power", default=1.42e-4, type=float, help="channel gain at the reference")
    parser.add_argument("-ub", "--uav_bandwidth", default=15, type=int, help="bandwidth allocated for UAV uplin transmission rate 'MHz'")
    parser.add_argument("-cb", "--cloud_bandwidth", default=10, type=int, help="bandwidth allocated for cloud uplink transmission 'MHz'")
    parser.add_argument("-up","--uav_power", default=0.01, type=float, help="uplink transmission power for UAV offloading  'W'")
    parser.add_argument("-cp", "--cloud_power", default=0.015, type=float, help="uplink transmission power for cloud offloading  'W'")
    parser.add_argument("-n", "--noise_power", default=-90, type=float, help="background noise power  'dBm/Hz'")
    parser.add_argument("-ptf", "--propagation_time_factor", default=4e-9, type=float, help="uplink propogation delay factor  's/bit'")
    parser.add_argument("-lec", "--local_energy_consumption_factor", default=1e-23, type=float, help="local energy consumption factor 'theta' J/cycle")
    
    
    # values not given in paper
    #------------------------------------------------------------------------------------------------------------------------------------
    parser.add_argument("-pf", "--punishment_factor", default=-2, type = float, help="if tolerant delay < energy consumption")  
    parser.add_argument("-p", "--priority_scale", default=0.4, type=float, help="scale for prioritization")  
    parser.add_argument("-m_e", "--min_epsilon", default=0.02, type=float, help="minimum value of exploration rate")
    parser.add_argument("-e_d", "--epsilon_decay", default=0.0001, type=float, help="exploration decay rate")
    parser.add_argument('-m_b', "--min_beta", default=0.4, type=float, help="minimum value of importance sampling")
    parser.add_argument("-b_max", "--beta_max", default=1.0, type=float, help="incrementing value of importance sampling beta")
    parser.add_argument("-ts", "--max_timesteps", default=20, type=int, help="maximum timesteps in each epsisode")  ## value not given in paper
    parser.add_argument("-ed", "--episodes", default=10 , type=int, help="total number of episodes")    ## value not given in paper 
    #-------------------------------------------------------------------------------------------------------------------------------------
    
    
    
    # values range is given in paper
    # ----------------------------------------------------------------------------------------------------------------------------------------
    parser.add_argument("-t", "--task_size", default=random.uniform(100, 800000), type=float, help="offloading task size  'Kb'")
    parser.add_argument("-cpu", "--cpu_cycle", default=random.uniform(5e+5, 5e+9), type=float, help="cpu_cycle required by task ")
    parser.add_argument("-d", "--tolerant_delay", default=random.uniform(0.01, 1), type=float, help="task tolerant value 'seconds'")
    #------------------------------------------------------------------------------------------------------------------------------------------
    np.set_printoptions(precision=2)

    args = vars(parser.parse_args())
    
    
    env = ENV(args)
   
    
    all_agents = []
    for agent_idx in range(args['agent_number']):
        all_agents.append(Agent(env.state_size, env.action_size, agent_idx, args))
    
    env.main(all_agents)
    
   

episode 0 -----reward : [[232685.67540325108, -1.7388674208078102e-07, -1.10621343691578e-11]] --------Final step : 1, --------done : [False, False, False]





episode 1 -----reward : [[-1.3493542511731201e-11, -1.3000350719590717e-11, -1.0842828372759324e-11]] --------Final step : 1, --------done : [False, False, False]





episode 2 -----reward : [[232685.67540325108, -1.0985940638149626e-11, -1.7388674208078102e-07]] --------Final step : 1, --------done : [False, False, False]





episode 3 -----reward : [[-1.0930837430661354e-11, -1.3331017319407473e-11, 232685.67540325108]] --------Final step : 1, --------done : [False, False, False]





episode 4 -----reward : [[-1.0930837430661354e-11, 232685.67540325108, -1.1893606080022056e-11]] --------Final step : 1, --------done : [False, False, False]





episode 5 -----reward : [[-1.3493542511731201e-11, -1.088728552926226e-11, -1.10621343691578e-11]] --------Final step : 1, --------done : [False, False, False]





episode 6 -----re