In [1]:
# Import Modules
import time
import math
import matplotlib.pyplot as plt
import pygame
import random
import numpy as np
from pygame.locals import *
from swarm import *
from DDPG import *


pygame 2.6.0 (SDL 2.28.4, Python 3.10.14)
Hello from the pygame community. https://www.pygame.org/contribute.html


2024-08-17 14:37:22.167604: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-17 14:37:22.172066: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-17 14:37:22.184240: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-17 14:37:22.203172: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-17 14:37:22.208744: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-17 14:37:22.225371: I tensorflow/core/platform/cpu_feature_gu

In [2]:

# Simulation Parameters
number_of_particles = 24
number_of_axes      = 2
delta_t             = 0.1
t_final             = 5000
screen_size         = [1500,1200]
initial_location    = [screen_size[0]/2,screen_size[1]/2]
list_min_distance   = []
list_ave_distance   = []
xtrg                = [initial_location[ii] + np.random.randint([900,1400])[ii] for ii in range(number_of_axes)]
particles           = swarm(number_of_particles=number_of_particles, screensize=screen_size, target_location=xtrg,
                            display=True, CommRng=100, dim=number_of_axes)
rlagent             = [key for key in particles.member.keys() if particles.member[key]['role']=='rlagent'][0]
leader              = particles.leader
numberofneighbour   = 5
numberofleader      = 1
clock               = pygame.time.Clock()
numberofepochs      = 100000
state               = []
newstate            = []
train               = False


-----------------------
SUMMARY FOR PARTICLES
-----------------------

particle id :  0
role        :  leader
target      :  target
wghts       :  [ 0.  0.  0.  0. 10.  0.]
dist2wp     :  1015.439930436177
-----------------------
particle id :  1
role        :  rlagent
target      :  leader
wghts       :  no weights
dist2wp     :  159.65399058614398
-----------------------
particle id :  2
role        :  follower
target      :  leader
wghts       :  [-8.527, 9.441, -1.126, 7.908, 5.326, -3.06]
dist2wp     :  243.26426915931114
-----------------------
particle id :  3
role        :  follower
target      :  leader
wghts       :  [-8.527, 9.441, -1.126, 7.908, 5.326, -3.06]
dist2wp     :  250.1324791094496
-----------------------
particle id :  4
role        :  follower
target      :  leader
wghts       :  [-8.527, 9.441, -1.126, 7.908, 5.326, -3.06]
dist2wp     :  154.07774504244458
-----------------------
particle id :  5
role        :  follower
target      :  leader
wghts       :  [-8

In [3]:
# Instance of DDPG is created.
print('----------------------------------------------------------------------------')
print('There will be %s states, %s for relative velocity, %s for relative position' % \
      (particles.dim*(numberofneighbour+numberofleader)*2,\
      particles.dim*(numberofneighbour+numberofleader),\
      particles.dim*(numberofneighbour+numberofleader)))
print('----------------------------------------------------------------------------')
### Some states are from the closest leader ###
print('%s of the states are gathered from the closest leader of the swarm' % (numberofleader*particles.dim*2))
print('----------------------------------------------------------------------------')
myagent           = Agent(actor_network  = {'nn'          :[300,200],
                                            'activation'  :'relu',
                                            'initializer' :glorot_normal,
                                            'optimizer'   :Adam(learning_rate=0.001)}, 
                          critic_network = {'nn'          :[200,300],
                                            'concat'      :[100,200,50],
                                            'activation'  :'relu',
                                            'initializer' :glorot_normal,
                                            'optimizer'   :Adam(learning_rate=0.002)},
                          loadsavedfile=False,
                          disablenoise=False,
                          lowerBound=-3,upperBound=3,
                          numberOfActions=number_of_axes,
                          numberOfStates=particles.dim*(numberofneighbour+numberofleader)*2,
                          buffer_capacity= 250000, batch_size= 256,
                          tau= 0.001, gamma= 0.95, annealing= 10000)

----------------------------------------------------------------------------
There will be 24 states, 12 for relative velocity, 12 for relative position
----------------------------------------------------------------------------
4 of the states are gathered from the closest leader of the swarm
----------------------------------------------------------------------------
Size of State Space ->  24
Size of Action Space ->  2
Max Value of Action ->  3
Min Value of Action ->  -3


In [4]:
#States are appended to the "states list"
def stateappend(state):
    state = []
    for relpos,relvel in zip(list(particles.member[rlagent]['relative_position'].values())[0:numberofneighbour],\
                             list(particles.member[rlagent]['relative_velocity'].values())[0:numberofneighbour]):
        for pos,vel in zip(relpos.values(),relvel.values()):
            state.append(pos)
            state.append(vel)

    for relpos,relvel in zip(list(particles.member[rlagent]['distance2leader'].values()),\
                             list(particles.member[rlagent]['velocity2leader'].values())):
        state.append(relpos)
        state.append(relvel)
    state = np.array(state)
    return state

In [5]:
# Reward Function 
def rewardfunction(dist2leader,dist2closest,score,t):
    if dist2leader >= 500.0 or dist2closest <=2.0:
        reward = -10000
    else:
        if dist2closest > 2.0 and dist2closest < 10.0:
            reward = dist2closest**3 - dist2leader
        else:
            reward = 1000 - dist2closest**1.5 - dist2leader
    
    reward = reward / 10000
    score = score + reward
    t  = t + delta_t
    
    if score <= -100 or reward <= -1 or t >= t_final:
        done = True
    else:
        done = False
    
    return reward, score, done, t

In [None]:
# Main Function
for epoch in range(numberofepochs):
    xtrg        = [np.random.randint(screen_size)[ii] for ii in range(number_of_axes)]
    particles.__init__(number_of_particles=number_of_particles,screensize=screen_size,target_location=xtrg,
                       display=True,CommRng=100,summary=False)
    rlagent     = [key for key in particles.member.keys() if particles.member[key]['role']=='rlagent'][0]
    state       = stateappend(state)
    done        = False
    t, score    = 0 , 0 
    myagent.mtd = False
    myagent.msd = False
    while not done:
        particles.rulebasedalgo()
        action = myagent.policy(state.reshape(1,myagent.numberOfStates))

        for dim in range(particles.dim):
            particles.member[rlagent]['deltavel'][str(dim)] = action[int(dim)]

        particles.update(keepGoing=not done)
        distance = {'2leader'  : (lambda x: np.sqrt(x[0]**2+x[1]**2))\
                                 (list(particles.member[rlagent]['distance2leader'].values())),
                    '2closest' : particles.member[rlagent]['abs_distance_sorted'][1]}
        newstate = stateappend(newstate)
        reward, score, done, t = rewardfunction(distance['2leader'],distance['2closest'],score,t)
        myagent.observation    = (state,action,reward,newstate)
        myagent.record_buffer()
        
        state = newstate
        print('ep= %s, act0= %.3f, act1= %.3f, vel0= %.3f, vel1= %.3f, rwd= %0.2f, scr= %0.2f, mt= %0.2f, ms= %0.2f, d2l= %0.2f, d2c= %0.2f, noisevar= %.2f, t= %0.1f' %\
             (epoch,action[0],action[1],particles.member[rlagent]['velocity']['0'],particles.member[rlagent]['velocity']['1'],reward,score,myagent.maxtime,myagent.maxscore,distance['2leader'],distance['2closest'],myagent.noisevariance,t))
        
        if t%100 >= 0.0 and t%100 < delta_t:
            print('\ntarget location changes\n')
            particles.trgt_loc                 = {str(ii) : np.random.randint(screen_size)[ii] for ii in\
                                                            range(particles.dim)}
            particles.targetposition['target'] = particles.trgt_loc

    if done:
        if t >= myagent.maxtime:
            myagent.maxtime  = t
            myagent.mtd      = True
            print('saving models for mtd')
        if score >= myagent.maxscore:
            myagent.maxscore = score
            myagent.msd      =True
            print('saving models for msd')
        if train:
            myagent.save()
            myagent.learn()
    
        print('\n----- New Epoch ----- Epoch: %s\n' % (epoch+1))
            
    print('-----------------------------------------------------------------')

ep= 0, act0= -2.840, act1= -3.000, vel0= -20.000, vel1= 12.054, rwd= 0.08, scr= 0.08, mt= 0.00, ms= 0.00, d2l= 154.07, d2c= 15.74, noisevar= 0.60, t= 0.1
ep= 0, act0= -2.752, act1= -2.299, vel0= -20.000, vel1= 11.824, rwd= 0.08, scr= 0.15, mt= 0.00, ms= 0.00, d2l= 154.75, d2c= 18.89, noisevar= 0.60, t= 0.2
ep= 0, act0= -3.000, act1= -3.000, vel0= -20.000, vel1= 11.524, rwd= 0.07, scr= 0.23, mt= 0.00, ms= 0.00, d2l= 155.47, d2c= 22.36, noisevar= 0.60, t= 0.3
ep= 0, act0= -3.000, act1= 3.000, vel0= -20.000, vel1= 11.824, rwd= 0.07, scr= 0.30, mt= 0.00, ms= 0.00, d2l= 156.15, d2c= 25.99, noisevar= 0.60, t= 0.4
ep= 0, act0= -3.000, act1= -2.173, vel0= -20.000, vel1= 11.607, rwd= 0.07, scr= 0.37, mt= 0.00, ms= 0.00, d2l= 156.86, d2c= 24.46, noisevar= 0.60, t= 0.5
ep= 0, act0= -2.968, act1= -3.000, vel0= -20.000, vel1= 11.307, rwd= 0.07, scr= 0.45, mt= 0.00, ms= 0.00, d2l= 157.60, d2c= 22.15, noisevar= 0.60, t= 0.6
ep= 0, act0= -2.003, act1= -2.511, vel0= -20.000, vel1= 11.056, rwd= 0.07, sc