In [None]:
import traci

import math

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt


import numpy as np
import parl
from parl import layers
from paddle import fluid

import paddle.fluid as fluid


In [None]:
class MujocoModel(parl.Model):

    #set act dim = 3
    #set obs dim = 25 * 3

    def __init__(self, act_dim):
        self.actor_model = ActorModel(act_dim)
        self.critic_model = CriticModel()

    def policy(self, obs):
        
        return self.actor_model.policy(obs)

    def value(self, obs, act):
        return self.critic_model.value(obs, act)

    def get_actor_params(self):
        return self.actor_model.parameters()


class ActorModel(parl.Model):
    def __init__(self, act_dim):
        hid1_size = 400
        hid2_size = 300

        self.fc1 = layers.fc(size=hid1_size, act='relu')
        self.fc2 = layers.fc(size=hid2_size, act='relu')
        self.fc3 = layers.fc(size=act_dim, act='tanh')

    def policy(self, obs):
        hid1 = self.fc1(obs)
        hid2 = self.fc2(hid1)
        means = self.fc3(hid2)
        return means


class CriticModel(parl.Model):
    def __init__(self):
        hid1_size = 400
        hid2_size = 300

        self.fc1 = layers.fc(size=hid1_size, act='relu')
        self.fc2 = layers.fc(size=hid2_size, act='relu')
        self.fc3 = layers.fc(size=1, act=None)

    def value(self, obs, act):
        hid1 = self.fc1(obs)
        concat = layers.concat([hid1, act], axis=1)
        hid2 = self.fc2(concat)
        Q = self.fc3(hid2)
        Q = layers.squeeze(Q, axes=[1])
        return Q
    
    
    
class MujocoAgent(parl.Agent):
    def __init__(self, algorithm, obs_dim, act_dim):
        assert isinstance(obs_dim, int)
        assert isinstance(act_dim, int)
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        super(MujocoAgent, self).__init__(algorithm)

        # Attention: In the beginning, sync target model totally.
        self.alg.sync_target(decay=0)

    def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            self.pred_act = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            act = layers.data(
                name='act', shape=[self.act_dim], dtype='float32')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            next_obs = layers.data(
                name='next_obs', shape=[self.obs_dim], dtype='float32')
            terminal = layers.data(name='terminal', shape=[], dtype='bool')
            _, self.critic_cost = self.alg.learn(obs, act, reward, next_obs,
                                                 terminal)

    def predict(self, obs):
        obs = np.expand_dims(obs, axis=0)
        act = self.fluid_executor.run(
            self.pred_program, feed={'obs': obs},
            fetch_list=[self.pred_act])[0]
        act = np.squeeze(act)
        act = np.absolute(act)
        return act

    def learn(self, obs, act, reward, next_obs, terminal):
        feed = {
            'obs': obs,
            'act': act,
            'reward': reward,
            'next_obs': next_obs,
            'terminal': terminal
        }
        critic_cost = self.fluid_executor.run(
            self.learn_program, feed=feed, fetch_list=[self.critic_cost])[0]
        self.alg.sync_target()
        return critic_cost


In [None]:
def calDis(p1, p2):

     dx = float(p1[0]) - float(p2[0])
     dy = float(p1[1]) - float(p2[1])
     return math.sqrt((dx**2)+(dy**2))


def calSpeedD(v1, v2):
     v1 = float(v1)
     v2 = float(v2)
     return math.fabs(v1 - v2)


def calAccD(a1, a2):
     a1 = float(a1)
     a2 = float(a2)
     return math.fabs(a1 - a2)


def getDifferenceData(obs):

     diffList = []
     totalDiffPos = 0
     totalDifSpeed = 0
     totalDifAcc = 0
     communicationRange = 30
     averageSpeed = 0
     averageAcc = 0
     connection ={}
     for v in obs:

          vCollectedDis = []
          vCollectedSpeedDis = []
          vCollectedAccDis = []
          vCollectCommCars = []
          averageSpeed += v[2]
          averageAcc += v[3]
          
          for i in range(0, len(obs)):
               tempCar = obs[i]
               if v[0] != tempCar[0]:
                    distance = calDis(v[1], tempCar[1])   
                    if distance < communicationRange:
                        vCollectCommCars.append(tempCar[0])
                        vCollectedDis.append(distance)
                        speedDis = calSpeedD(v[2], tempCar[2])
                        vCollectedSpeedDis.append(speedDis)
                        accDis = calAccD(v[3], tempCar[3])
                        vCollectedAccDis.append(accDis)
                    

          diffList.append([v[0], np.mean(vCollectedDis), np.mean(
          vCollectedSpeedDis), np.mean(vCollectedAccDis), vCollectCommCars])
          connection[v[0]] = vCollectCommCars
     vehicleNum = len(diffList)
     obsMobility = [0,0,0]
     if vehicleNum !=0:  
         obsMobility = [averageSpeed / vehicleNum, averageSpeed / vehicleNum ]
     result = [obsMobility ,connection ]
     # print(len( diffList))
     return diffList, result

In [None]:
def clusterCreation(diffList, clusterNumber, obsPD, obs, obsMobility,action ):
#      w1 = 0.2
#      w2 = 0.4
#      w3 = 0.4
#      w1 = 0.6
#      w2 = 0.1
#      w3 = 0.3
#      w1 = 1
#      w2 = 1
#      w3 = 1
     w1  = action[0]
     w2 = action[1]
     w3 = action[2]
     t = 0
     dic = {}
#      obsMobility = obsMobility[0]
     for singleV in diffList:
            if len(singleV[4]) not in dic:
                dic[len(singleV[4])]  = [singleV[0]]
            else:
                dic[len(singleV[4])].append(singleV[0])
     if len(dic.keys()) > 3:
       
        clusters = sorted(dic.keys(),reverse=True)[:3]
        
     else:
        clusters = sorted(dic.keys(),reverse=True)
    
#      for x in dic:
#             print(x)
#      print("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")
    
#      for i in action:
#         t += abs(i)
#      w1 = action[0] / t
#      w2 = action[1] /t
#      w3 = action[2] /t
     tempScore = 100000
     communicationRange = 30
     sequenceScores = []
     if obsMobility[0] == 0:
        obsMobility[0] = 1
     if obsMobility[1] == 0:
        obsMobility[1] = 1
    
     scoreDic = {}
     for singleV in diffList:
          score = w1 * singleV[1] / 30  + w2 * singleV[2] / obsMobility[0] + w3 * singleV[3] / obsMobility[1]
          scoreDic[singleV[0]] = score
        
     clusterHeads = {}
     for c in clusters:
        s =  1000
        
        if len(dic[c]) > 1:
            clusterHead = ""
            for v in dic[c]:
                if scoreDic[v] < s:
                    clusterHead = v
                    s = scoreDic[v]
                
#         print(clusterHead)
            if clusterHead !='':
                clusterHeads[clusterHead] = c
     
     return clusterHeads

In [None]:
currentClusters = []
def highway(action):
    clusterChange = False
    obsMobility = 0 
    reward = 0
    obsOut = []
    done = False
    while clusterChange != True:
        ls = traci.vehicle.getIDList()
        obs = []
        for veh_id in ls:
            position = traci.vehicle.getPosition(veh_id)
            speed = traci.vehicle.getSpeed(veh_id)
            acceleration = traci.vehicle.getAcceleration(veh_id)
            singleV = [veh_id, position, speed, acceleration]
            obs.append(singleV)
                        #   print(veh_id ,position,speed,acceleration)



              # print(obs,reward)
        obsPD = pd.DataFrame(
              obs, columns=['V-ID', 'Position', 'Speed', 'Acc'])
        
        difflist, obsMobility = getDifferenceData(obs)
        if traci.simulation.getMinExpectedNumber() == 3 :
            done = True
        
        c = clusterCreation(difflist, 8, obsPD, obs,obsMobility[0], action)
#                 print(c)
        gone = []
        for h in c :
            if h not in clusterDic:
                clusterDic[h] = 0
                record[h] = 0
                        
        for h in clusterDic:
            if h in ls:
                clusterDic[h] +=1     
            else:
                if h in record:
                    gone.append(clusterDic[h])
                    del record[h]
        traci.simulationStep()
        if len(gone) > 0:
            clusterChange = True
#             print(np.mean(gone))
            reward = np.mean(gone) - 48.8
            for x in obs:
                obsOut.append(x[1][0])
                obsOut.append(x[1][1])
                obsOut.append(x[2])
                obsOut.append(x[3])
            if len(obsOut) < 150:
                for x in range(0, 150 - len(obsOut)):
                    obsOut.append(0)
                          
            break
            
    return obsOut,reward,done,obsMobility[0]

In [None]:
ACTOR_LR = 1e-4
CRITIC_LR = 1e-3
GAMMA = 0.99
TAU = 0.001
MEMORY_SIZE = int(1e6)
MEMORY_WARMUP_SIZE = 1e4
BATCH_SIZE = 128
REWARD_SCALE = 0.1
ENV_SEED = 1

In [None]:
eachEpisodeClusterLife = [] 


def run_train_episode(agent, rpm):
    
    traci.start(["sumo", "-c", "highway.sumocfg"])
    obs,reward, done, ob = highway([0.8,0.1,0.1])
    ClusterLife = []
    total_reward = 0
    while True:
        batch_obs = np.expand_dims(obs, axis=0)
#         print(batch_obs)
        action = agent.predict(batch_obs.astype('float32'))

        action = np.clip(np.random.normal(action, 1.0), 0, 1.0)

        next_obs, reward, done, ob =  highway(action)
        ClusterLife.append(reward)

#         print(next_obs)

        rpm.append(obs, action, REWARD_SCALE * reward, next_obs, done)

        # print("rpm is ",rpm.sample_batch(
        #         BATCH_SIZE))

        # print(info)

        if rpm.size() > MEMORY_WARMUP_SIZE:
            batch_obs, batch_action, batch_reward, batch_next_obs, batch_terminal = rpm.sample_batch(
                BATCH_SIZE)
            agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs,
                        batch_terminal)
           

        obs = next_obs
        total_reward += reward

        if done:
            break
    
    eachEpisodeClusterLife.append(ClusterLife)
#     print(total_reward)
    traci.close()
    print("close")
    return total_reward

In [None]:
def run_evaluate_episode(agent):
        
    traci.start(["sumo", "-c", "highway.sumocfg"])
    obs,reward, done, ob = highway([0.8,0.1,0.1])
    total_reward = 0
    ClusterLife = []
    while True:
        batch_obs = np.expand_dims(obs, axis=0)
        action = agent.predict(batch_obs.astype('float32'))
#         print(action)

        next_obs, reward, done,ob = highway(action)
        ClusterLife.append(reward)
        obs = next_obs
        total_reward += reward

        if done:
            break
    traci.close()
    eachEpisodeClusterLife.append(ClusterLife)
    return total_reward

In [None]:
from parl.utils import check_version_for_fluid  # requires parl >= 1.4.1
check_version_for_fluid()

import argparse
import gym
import numpy as np
import time
import parl

from parl.utils import logger, ReplayMemory
from parl.env.continuous_wrappers import ActionMappingWrapper

In [None]:
obs_dim = 150
act_dim = 3

rewardCollected = []
    
model = MujocoModel(act_dim)
algorithm = parl.algorithms.DDPG(
model, gamma=GAMMA, tau=TAU, actor_lr=ACTOR_LR, critic_lr=CRITIC_LR)

agent = MujocoAgent(algorithm, obs_dim, act_dim)

rpm = ReplayMemory(MEMORY_SIZE, obs_dim, act_dim)

while rpm.size() < MEMORY_WARMUP_SIZE:
    print(rpm.size())
    clusterDic = {}
    performance = {}
    record = {}
    run_train_episode(agent, rpm)

print('xxxx')    
episode = 0
RewardCollect = []

In [None]:
while episode < 1000 :
    
    for i in range(10):
        clusterDic = {}
        performance = {}
        record = {}
        train_reward = run_train_episode(agent, rpm)
        rewardCollected.append(train_reward)
        episode += 1
        logger.info('Episode: {} Reward: {}'.format(episode, train_reward))
        
    clusterDic = {}
    performance = {}
    record = {}
    evaluate_reward = run_evaluate_episode(agent)
    rewardCollected.append(evaluate_reward)
    logger.info('Episode {}, Evaluate reward: {}'.format(episode, evaluate_reward))

In [None]:
step = 1
total = 0
averageReward = []
for x in rewardCollected1:
    total += x
    
    averageReward.append(total / step)
    step+=1

In [None]:
plt.plot(range(0,len(averageReward)), averageReward, color='green', label='Average Reward')

plt.legend() # 显示图例

plt.xlabel('Episode')
plt.ylabel('Average Reward')
plt.savefig("AveragerewardCollectedFinal.jpg")

In [None]:
clusterDic = {}
performance = {}
record = {}
ClusterLifeFinalModel = []
traci.start(["sumo", "-c", "highway.sumocfg"])
obs,reward, done,ob = highway([0.8,0.1,0.1])
total_reward = 0
ClusterLife = []

SpeedLife = {}
AccLife = {}


while True:
    batch_obs = np.expand_dims(obs, axis=0)
    action = agent.predict(batch_obs.astype('float32'))

#     newac = []
#     for i in action:
#         newac.append(-i)
#     action = np.clip(np.random.normal(action, 1.0), 0, 1.0)
#     next_obs, reward, done,ob = highway(action)
    next_obs, reward, done,ob =  highway([0.8,-2,0.1])
    speed = round(ob[0], 1)
    acc = round(ob[1],1)
    if speed in SpeedLife:
        SpeedLife[speed].append((reward + 48.8)/2)
    else:
        SpeedLife[speed] = [(reward + 48.8)/2]
    print(SpeedLife[speed])
    
   
   
    if acc in AccLife:
        AccLife[acc].append((reward + 48.8))
    else:
        AccLife[acc] = [reward + 48.8]
   
        
    
    
    ClusterLifeFinalModel.append((reward + 48.8)/2)
    obs = next_obs
    total_reward += reward

    if done:
        break
traci.close()
  
print(total_reward)

In [None]:
clusterDic = {}
performance = {}
record = {}
ClusterLifeFinalModelRL = []
traci.start(["sumo", "-c", "highway.sumocfg"])
obs,reward, done,ob = highway([0.8,0.1,0.1])
total_reward = 0
ClusterLife = []

SpeedLifeRL = {}
AccLifeRL = {}


while True:
    batch_obs = np.expand_dims(obs, axis=0)
    action = agent.predict(batch_obs.astype('float32'))

#     newac = []
#     for i in action:
#         newac.append(-i)
#     action = np.clip(np.random.normal(action, 1.0), 0, 1.0)
    next_obs, reward, done,ob = highway(action)
#     next_obs, reward, done,ob = highway(action) highway([0.8,0.1,0.1])
    speed = round(ob[0], 1)
    acc = round(ob[1],1)
    if speed in SpeedLifeRL:
        SpeedLifeRL[speed].append((reward + 48.8)/2)
    else:
        SpeedLifeRL[speed] = [(reward + 48.8)/2]

    
   
   
    if acc in AccLifeRL:
        AccLifeRL[acc].append((reward + 48.8))
    else:
        AccLifeRL[acc] = [reward + 48.8]
   
        
    
    
    ClusterLifeFinalModelRL.append((reward + 48.8)/2)
    obs = next_obs
    total_reward += reward

    if done:
        break
traci.close()
  
print(total_reward)

In [None]:
CRLAverage = []
step = 1
tt = 0
totalLifeRL = 0
for x in CRL:
    totalLifeRL  += x
    tt+= x
    CRLAverage.append(tt/step)
    step +=1

In [None]:
CAverage = []
step = 1
tt = 0
totalLife = 0
for x in CWRL:
    totalLife += x
    tt+= x   
    CAverage.append(tt/step)
    step +=1

In [None]:
plt.plot(range(0,len(CAverage)), CAverage, color='green', label='Fixed Weights')
plt.plot(range(0,len(CRLAverage)), CRLAverage, color='red',label='RL Weights')

plt.legend() # 显示图例

plt.xlabel('Cluster Change Time')
plt.ylabel('Average Cluster Life Time')
plt.savefig("xxx1.jpg")

In [None]:
forSpeedLifeDrowX = []
forSpeedLifeDrowY = []
for x in sorted(SpeedLife.keys()):
    forSpeedLifeDrowX.append(float(x))
    forSpeedLifeDrowY.append(float(np.mean(SpeedLife[x])))

print(forSpeedLifeDrowX)

In [None]:
forSpeedLifeDrowXRL = []
forSpeedLifeDrowYRL = []
for x in sorted(SpeedLifeRL.keys()):
    forSpeedLifeDrowXRL.append(float(x))
    forSpeedLifeDrowYRL.append(float(np.mean(SpeedLifeRL[x])))

print(forSpeedLifeDrowXRL)

In [None]:
plt.plot(forSpeedLifeDrowX, forSpeedLifeDrowY, color='green', label='Fixed Weights')
plt.plot(forSpeedLifeDrowXRL, forSpeedLifeDrowYRL, color='red',label='RL Weights')

plt.legend() # 显示图例

plt.xlabel('Average Speed')
plt.ylabel('Cluster Life Time')
plt.savefig("speed2.jpg")