In [None]:
import numpy as np 
from tqdm.notebook import tqdm
import joblib
import random
import os

import torch
from torch.utils import tensorboard
import scipy.io as scio

from arena import Arena
from env.env import Environment, SimplifiedEnvironment

In [None]:
def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     np.random.seed(seed)
     random.seed(seed)
     torch.backends.cudnn.deterministic = True

## hyper-parameters

In [None]:
epsilon = 1.0
final_epsilon = 0.1
anneal_period = 30000      
episodes = 40000            
episode_length = 100 
reset_period = 10
train_interval = 50
num_agent = 8
num_packet = [1, 2, 3, 4, 5, 6]     # fix as 6
target_update_frequency = 4
reward_weights = [0.00, 1.00]       #[0.0, 1.0]
batch_size = 64
learning_rate=0.0001
gamma=0.95
memory_size = 200000         


In [None]:
setup_seed(2333)

record_path = os.path.join('record', '{}_agents'.format(num_agent), 'train')

env = Environment(num_vehicle = num_agent, reward_weights = reward_weights, training=True)
arena = Arena(env, num_agent=env.num_vehicle, batch_size=batch_size, learning_rate=learning_rate, epsilon=epsilon, final_epsilon=final_epsilon, gamma=gamma, anneal_period=anneal_period, training=True)

writer = tensorboard.SummaryWriter('./log')

In [None]:
loss_list = []
reward_list = []
v2i_rate_list = []
v2v_success_list  = []
for episode in tqdm(range(episodes)):
    if (episode + 1) % reset_period == 0:
        torch.cuda.empty_cache()
        env.num_packet = 6
        arena.reset(episode)
    loss = 0.
    v2i_rate = 0.
    v2v_success = 0.
    v2i_rate_benchmark = 0.
    v2v_success_benchmark = 0.
    for step in range(episode_length):
        with torch.no_grad():
            arena.step(episode)
        if (step + 1) % train_interval == 0:
            loss += arena.train() 
        v2i_rate += env.v2i_rate.sum()
        v2v_success = max(v2v_success, np.sum(env.remaining_load <= 0))

    arena.update_epsilon()
    if (episode + 1) % target_update_frequency == 0:
        arena.update_target_model()
    
    loss_list.append(loss)
    reward_list.append(arena.episode_reward[-1])
    v2i_rate_list.append(v2i_rate / episode_length)
    v2v_success_list.append(v2v_success / env.num_vehicle)


    writer.add_scalar('loss', loss, episode)
    writer.add_scalar('reward', arena.episode_reward[-1], episode)
    writer.add_scalar('v2i_rate', v2i_rate / episode_length, episode)
    writer.add_scalar('v2v_success', v2v_success / env.num_vehicle, episode)
# save models
arena.save_models()
# save data

scio.savemat(os.path.join(record_path, 'loss_baseline.mat'), {'loss': loss_list})
scio.savemat(os.path.join(record_path, 'reward_baseline.mat'), {'reward': reward_list})
scio.savemat(os.path.join(record_path, 'v2i_rate_baseline.mat'), {'v2i_rate': v2i_rate_list})
scio.savemat(os.path.join(record_path, 'v2v_success_baseline.mat'), {'v2v_success': v2v_success_list})


## evaluation

In [None]:
# 8 agents: 10 seeds, 10 episodes
# 4 agents: 20 seeds, 20 episodes

trained_episode = 40000
episode_len = 100
episode = 20
num_agent = 8
num_seeds = 20
num_packet_list = [1, 2, 3, 4, 5, 6]
v2v_rate_list = [[] for _ in range(len(num_packet_list))]
v2i_rate_list = [[] for _ in range(len(num_packet_list))]
v2v_success_list = [[] for _ in range(len(num_packet_list))]

# 当车辆数目增加到8时，由于计算复杂度，不考虑添加暴力搜索作为对比
# v2v_rate_benchmark_list = [[] for _ in range(len(num_packet_list))]
# v2i_rate_benchmark_list = [[] for _ in range(len(num_packet_list))]
# v2v_success_benchmark_list = [[] for _ in range(len(num_packet_list))]

record_path = os.path.join('record', '{}_agents'.format(num_agent), 'evaluate')

for i in tqdm(range(len(num_packet_list))):
    num_packet = num_packet_list[i]

    v2v_rate_mean = np.zeros(num_agent)
    v2i_rate_mean = np.zeros(num_agent)
    v2v_success_mean = 0.

    # v2v_rate_benchmark_mean = np.zeros(num_agent)
    # v2i_rate_benchmark_mean = np.zeros(num_agent)
    # v2v_success_benchmark_mean = 0.


    for j in tqdm(range(num_seeds)):
        setup_seed(1234 + 5 * j)
        env = Environment(num_vehicle=num_agent, num_packet=num_packet, training=False)
        memory = None
        arena = Arena(env, num_agent=env.num_vehicle, epsilon=0.1, training=False)

        v2v_rate = np.zeros(num_agent)  
        v2i_rate = np.zeros(num_agent)
        v2v_success = 0.

        # v2v_rate_benchmark = np.zeros(num_agent)  
        # v2i_rate_benchmark = np.zeros(num_agent)
        # v2v_success_benchmark = 0.

        for k in range(episode):
            v2v_success_temp = 0.
            v2v_success_benchmark_temp = 0.
            for _ in range(episode_len):
                arena.step(trained_episode)
                v2v_rate += env.v2v_rate
                v2i_rate += env.v2i_rate
                v2v_success_temp = max(v2v_success_temp, np.sum(env.remaining_load <= 0))

                # v2v_rate_benchmark += env.v2v_rate_benchmark
                # v2i_rate_benchmark += env.v2i_rate_benchmark
                # v2v_success_benchmark_temp = max(v2v_success_benchmark_temp, np.sum(env.remaining_load_benchmark <= 0))

            v2v_success += v2v_success_temp
            # v2v_success_benchmark += v2v_success_benchmark_temp
            
        v2v_rate /= (episode_len * episode)
        v2i_rate /= (episode_len * episode)
        v2v_success /= (episode * env.num_vehicle)

        v2v_rate_mean += v2v_rate / num_seeds
        v2i_rate_mean += v2i_rate / num_seeds
        v2v_success_mean += v2v_success / num_seeds
       
        v2v_rate_list[i].append(v2v_rate.sum())
        v2i_rate_list[i].append(v2i_rate.sum())
        v2v_success_list[i].append(v2v_success * 100)

        # v2v_rate_benchmark /= (episode_len * episode)
        # v2i_rate_benchmark /= (episode_len * episode)
        # v2v_success_benchmark /= (episode * env.num_vehicle)

        # v2v_rate_benchmark_mean += v2v_rate_benchmark / num_seeds
        # v2i_rate_benchmark_mean += v2i_rate_benchmark / num_seeds
        # v2v_success_benchmark_mean += v2v_success_benchmark / num_seeds
       

        # v2v_rate_benchmark_list[i].append(v2v_rate_benchmark.sum())
        # v2i_rate_benchmark_list[i].append(v2i_rate_benchmark.sum())
        # v2v_success_benchmark_list[i].append(v2v_success_benchmark * 100)

    print('Number of packets: {}'.format(num_packet))
    print('My method----V2I Sum Rate: {:.2f}Mbps, V2V Sum Rate: {:.2f}Mbps, V2V Success Probability: {:.2f}%'.format(v2i_rate_mean.sum(), v2v_rate_mean.sum(), v2v_success_mean * 100))
    # print('Brute force----V2I Sum Rate: {:.2f}Mbps, V2V Sum Rate: {:.2f}Mbps, V2V Success Probability: {:.2f}%'.format(v2i_rate_benchmark_mean.sum(), v2v_rate_benchmark_mean.sum(), v2v_success_benchmark_mean * 100))
   

# save data
scio.savemat(os.path.join(record_path, 'v2v_rate_baseline.mat'), {'v2v_rate_baseline': v2v_rate_list})
scio.savemat(os.path.join(record_path, 'v2i_rate_baseline.mat'), {'v2i_rate_baseline': v2i_rate_list})
scio.savemat(os.path.join(record_path, 'v2v_success_baseline.mat'), {'v2v_success_baseline': v2v_success_list})
# scio.savemat(os.path.join(record_path, 'v2v_rate_bruteforce.mat'), {'v2v_rate_bruteforce': v2v_rate_benchmark_list})
# scio.savemat(os.path.join(record_path, 'v2i_rate_bruteforce.mat'), {'v2i_rate_bruteforce': v2i_rate_benchmark_list})
# scio.savemat(os.path.join(record_path, 'v2v_success_bruteforce.mat'), {'v2v_success_bruteforce': v2v_success_benchmark_list})