In [1]:
import torch

In [2]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt


In [3]:
from envs.env import D2DEnv
from algorithms.baselines import EarliestDeadlineFirstScheduler, GFAccess
from algorithms.ippo import iPPO


In [4]:
Tf_gf = 4*(1 / 30 * 1e-3 + 2.34e-6)
1e-3 / Tf_gf

7.008035881143711

In [5]:
n_agents = 2
deadlines = np.array([7]*n_agents)
lbdas = np.array([1/14]*n_agents)
period = None
arrival_probs = None
offsets = None
neighbourhoods = [list(range(n_agents)) for k in range(n_agents)]


In [6]:
env = D2DEnv(n_agents,
                deadlines,
                lbdas,
                period=period,
                arrival_probs=arrival_probs,
                offsets=offsets,
                episode_length=200,
                traffic_model='aperiodic',
                periodic_devices=[],
                reward_type=0,
                channel_switch=0,
                channel_decoding=1.,
                neighbourhoods=neighbourhoods, # Neighbourhoods is a list of size n_agents with the indices of the neighbours for each agent.
                verbose=False)

In [8]:
edf = EarliestDeadlineFirstScheduler(env, use_channel=False, verbose=True)


In [9]:
res_edf = edf.run(500)


Number of received packets: 71779.0
Number of channel_losses: 0


In [10]:
print(f"URLLC score: {res_edf[0]}")
print(f"Jain's index: {res_edf[1]}")
print(f"Channel errors: {res_edf[2]}")
print(f"Reward per episode: {res_edf[3]}")


URLLC score: 0.9967957201967149
Jain's index: 0.9997862827685239
Channel errors: 0
Reward per episode: 1414.96


In [11]:
gf = GFAccess(env, use_channel=False)


In [12]:
cv = gf.get_best_transmission_probs(100)
gf.transmission_prob = gf.transmission_prob_list[np.argmax(cv)]
print(f"Transmission probabilities: {gf.transmission_prob_list} \nURLLC scores: {cv}")


Transmission probabilities: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] 
URLLC scores: [0.3666482224374049, 0.5064612326043738, 0.5461776714064479, 0.5122845871686622, 0.45994993742177726, 0.38629416364541125, 0.2977648605632828, 0.23084384093113486, 0.16863864142538976, 0.11496897441260545]


In [14]:
res_gf = gf.run(500)


In [15]:
print(f"URLLC score: {res_gf[0]}")
print(f"Jain's index: {res_gf[1]}")
print(f"Channel errors: {res_gf[2]}")
print(f"Reward per episode: {res_gf[3]}")


URLLC score: 0.5337340886740447
Jain's index: 0.9426200635570335
Channel errors: 0
Reward per episode: 271.32


In [18]:
env.action_space[0].n


2

In [32]:
observations = {f"{i}": [] for i in range(n_agents)}
obs, (buffer_state, channel_state) = env.reset()
done = False
while not done:
    for i in range(n_agents):
        obs_agent = torch.tensor(obs[i], dtype=torch.float)
        observations[str(i)].append(obs_agent)
    action_agent = []
    log_prob_agent = []
    # entropy_agent = []
    value_agent = []
    actions = np.array(env.action_space.sample())
    next_obs, next_state, reward, done, _ = env.step(actions)
    obs = next_obs
    

In [37]:
torch.stack(observations['0'])


tensor([[0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 0., 1., 0.]])

In [7]:
ippo = iPPO(env)


In [8]:
ippo.test(10)

(0.9454191684465589, 0.9973912454640974, 0, 24.6)

In [9]:
ippo.train(5000, 15)

Episode: 0, mean score rollout: 0.9749857076825769 Score test: (0.9610333616487122, 0.9988356306405209, 0, 26.66)
Episode: 100, mean score rollout: 0.9937134502923978 Score test: (0.9927645670759262, 0.99970229538208, 0, 27.54)
Episode: 200, mean score rollout: 1.0 Score test: (0.9981691368788143, 0.9998990856009695, 0, 28.08)
Episode: 300, mean score rollout: 1.0 Score test: (1.0, 1.0, 0, 28.14)
Episode: 400, mean score rollout: 1.0 Score test: (1.0, 1.0, 0, 27.88)


KeyboardInterrupt: 

In [7]:
results = []
for g in [0.3, 0.5, 0.7, 0.9]:
    print(g)
    ippo = iPPO(env=env, gamma=g) 
    res = ippo.train(5000, 15, 1000)
    tst = ippo.test(500)
    results.append(tst[0])

0.3
Episode: 0, mean score rollout: 0.7186243456937375 Score test: (0.7312317358998964, 0.9586494040843317, 0, 41.74)
Episode: 1000, mean score rollout: 0.9948203782850519 Score test: (0.9939148438351347, 0.9992543231873209, 0, 86.04)
Episode: 2000, mean score rollout: 0.9984305036236901 Score test: (0.996676841194882, 0.9996964671474271, 0, 84.9)
Episode: 3000, mean score rollout: 0.9992063492063491 Score test: (0.9977847517234539, 0.9997910092998188, 0, 88.62)
Episode: 4000, mean score rollout: 0.9985775356561873 Score test: (0.9969578212726963, 0.9998131686718807, 0, 83.62)
0.5
Episode: 0, mean score rollout: 0.7386664905606206 Score test: (0.7225508932035315, 0.9273728056733022, 0, 38.7)
Episode: 1000, mean score rollout: 0.9924856467409658 Score test: (0.9948691492954912, 0.9995808233865663, 0, 84.32)


KeyboardInterrupt: 