In [1]:
import torch

In [2]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt


In [3]:
from envs.env import D2DEnv
from algorithms.baselines import EarliestDeadlineFirstScheduler, GFAccess
from algorithms.ippo import iPPO


In [4]:
Tf_gf = 4*(1 / 30 * 1e-3 + 2.34e-6)
1e-3 / Tf_gf

7.008035881143711

In [5]:
n_agents = 10
deadlines = np.array([7]*n_agents)
lbdas = np.array([1/14]*n_agents)
period = None
arrival_probs = None
offsets = None
neighbourhoods = [list(range(n_agents)) for k in range(n_agents)]


In [6]:
env = D2DEnv(n_agents,
                deadlines,
                lbdas,
                period=period,
                arrival_probs=arrival_probs,
                offsets=offsets,
                episode_length=200,
                traffic_model='aperiodic',
                periodic_devices=[],
                reward_type=0,
                channel_switch=0,
                channel_decoding=1.,
                neighbourhoods=neighbourhoods, # Neighbourhoods is a list of size n_agents with the indices of the neighbours for each agent.
                verbose=False)

In [8]:
edf = EarliestDeadlineFirstScheduler(env, use_channel=False, verbose=True)


In [9]:
res_edf = edf.run(500)


Number of received packets: 71779.0
Number of channel_losses: 0


In [10]:
print(f"URLLC score: {res_edf[0]}")
print(f"Jain's index: {res_edf[1]}")
print(f"Channel errors: {res_edf[2]}")
print(f"Reward per episode: {res_edf[3]}")


URLLC score: 0.9967957201967149
Jain's index: 0.9997862827685239
Channel errors: 0
Reward per episode: 1414.96


In [11]:
gf = GFAccess(env, use_channel=False)


In [12]:
cv = gf.get_best_transmission_probs(100)
gf.transmission_prob = gf.transmission_prob_list[np.argmax(cv)]
print(f"Transmission probabilities: {gf.transmission_prob_list} \nURLLC scores: {cv}")


Transmission probabilities: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] 
URLLC scores: [0.3666482224374049, 0.5064612326043738, 0.5461776714064479, 0.5122845871686622, 0.45994993742177726, 0.38629416364541125, 0.2977648605632828, 0.23084384093113486, 0.16863864142538976, 0.11496897441260545]


In [14]:
res_gf = gf.run(500)


In [15]:
print(f"URLLC score: {res_gf[0]}")
print(f"Jain's index: {res_gf[1]}")
print(f"Channel errors: {res_gf[2]}")
print(f"Reward per episode: {res_gf[3]}")


URLLC score: 0.5337340886740447
Jain's index: 0.9426200635570335
Channel errors: 0
Reward per episode: 271.32


In [18]:
env.action_space[0].n


2

In [32]:
observations = {f"{i}": [] for i in range(n_agents)}
obs, (buffer_state, channel_state) = env.reset()
done = False
while not done:
    for i in range(n_agents):
        obs_agent = torch.tensor(obs[i], dtype=torch.float)
        observations[str(i)].append(obs_agent)
    action_agent = []
    log_prob_agent = []
    # entropy_agent = []
    value_agent = []
    actions = np.array(env.action_space.sample())
    next_obs, next_state, reward, done, _ = env.step(actions)
    obs = next_obs
    

In [37]:
torch.stack(observations['0'])


tensor([[0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 0., 1., 0.]])

In [7]:
ippo = iPPO(env)


In [10]:
ippo.test(10)

(0.9936606397936977, 0.9993883413983079, 0, 86.4)

In [9]:
ippo.train(10000, 15)

Episode: 0, mean score rollout: 0.7488213021875807 Score test: (0.7469722589823404, 0.9444541646502899, 0, 39.62)
Episode: 100, mean score rollout: 0.8327039897350128 Score test: (0.8311155346140794, 0.9777690849931759, 0, 54.58)
Episode: 200, mean score rollout: 0.8567853930713938 Score test: (0.8645113433551288, 0.974426456337026, 0, 63.52)
Episode: 300, mean score rollout: 0.8734371014557402 Score test: (0.8723178806902085, 0.9782159383494688, 0, 66.6)
Episode: 400, mean score rollout: 0.9073268705795613 Score test: (0.9156660705619898, 0.9890156813598938, 0, 69.2)
Episode: 500, mean score rollout: 0.9329326821443029 Score test: (0.9296142931998415, 0.9918918502930987, 0, 72.86)
Episode: 600, mean score rollout: 0.9461803625275246 Score test: (0.9327030398804986, 0.9930824267038448, 0, 72.64)
Episode: 700, mean score rollout: 0.9200725689277418 Score test: (0.9330507912831562, 0.9913262219004569, 0, 75.48)
Episode: 800, mean score rollout: 0.956791463543989 Score test: (0.9549269218

KeyboardInterrupt: 