In [1]:
import numpy as np
import pickle
import random
import torch

In [2]:
from algorithms.noma_ppo import *
from nomaenv import NomaEnv

In [3]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7fdb804b8d10>

In [4]:
k = 16
deadlines = np.array([6 for _ in range(k)])
offsets = None
arrival_probs = None
period = None

env = NomaEnv(k,
              deadlines,
              lbda=1/9.3,
              period=period,
              arrival_probs=arrival_probs,
              offsets=offsets,
              episode_length=200,
              max_simultaneous_devices=3,
              traffic_model='aperiodic',
              channel_model='collision',
              distances=None,
              path_loss=False,
              shadowing=False,
              fast_fading=False,
              verbose=False
             )

In [5]:
nomappo = NomaPPO(env, 
                 lr_actor=1e-4,
                 lr_critic=1e-3,
                 hidden_size=256,
                 gamma=0.7,
                 K_epochs=4,
                 eps_clip=0.1,
                 prior_weight=0.7,
                 beta=0.1,
                 scheduler=False
                 )

In [6]:
nomappo.test(10)

(0.5233415233415233, 0.0, 0.0)

In [7]:
res = nomappo.learn(50000, update_frequency=100, test_length=20, early_stopping=True)


Episode : 0, Reward : 0.9329999999999999, Train scores: 0.7546583850931677, Test score : 0.5588145432325085, policy lr: 0.0001
Episode : 100, Reward : 1.0238, Train scores: 0.7862851601908681, Test score : 0.5877072873877986, policy lr: 0.0001
Episode : 200, Reward : 1.08369, Train scores: 0.802408559177018, Test score : 0.6211746522411128, policy lr: 0.0001
Episode : 300, Reward : 1.1224149999999997, Train scores: 0.8169740045067706, Test score : 0.6614173228346456, policy lr: 0.0001
Episode : 400, Reward : 1.15458, Train scores: 0.8282787913983195, Test score : 0.7002316602316603, policy lr: 0.0001
Episode : 500, Reward : 1.2155200000000002, Train scores: 0.8487999639037799, Test score : 0.757679180887372, policy lr: 0.0001
Episode : 600, Reward : 1.249775, Train scores: 0.8605689032203944, Test score : 0.7973886328725038, policy lr: 0.0001
Episode : 700, Reward : 1.28103, Train scores: 0.8670837652469332, Test score : 0.8309669705574653, policy lr: 0.0001
Episode : 800, Reward : 1.2

Episode : 6700, Reward : 1.48535, Train scores: 0.952140247103517, Test score : 0.9956188389923329, policy lr: 0.0001
Episode : 6800, Reward : 1.476525, Train scores: 0.9528135162703942, Test score : 0.9900386559619387, policy lr: 0.0001
Episode : 6900, Reward : 1.481745, Train scores: 0.9520495061128104, Test score : 0.9904211646647407, policy lr: 0.0001
Episode : 7000, Reward : 1.4779, Train scores: 0.9480745909811934, Test score : 0.9878414890423296, policy lr: 0.0001
Episode : 7100, Reward : 1.4884300000000001, Train scores: 0.9516381407738751, Test score : 0.9929775280898876, policy lr: 0.0001
Episode : 7200, Reward : 1.4786599999999999, Train scores: 0.950522174878137, Test score : 0.9880346993718218, policy lr: 0.0001
Episode : 7300, Reward : 1.48552, Train scores: 0.9529252206211509, Test score : 0.9934843313682904, policy lr: 0.0001
Episode : 7400, Reward : 1.4863400000000002, Train scores: 0.9543759547558841, Test score : 0.9923977497339213, policy lr: 0.0001
Episode : 7500, 

KeyboardInterrupt: 

In [8]:
nomappo.test(100)


(0.9929099876695437, 0.0, 0.0)

In [9]:
nomappo.test(1, verbose=True)


Timestep: 0
Current state: [[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
obs: tensor([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
         1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.]), Prior: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])
Action: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])
Number discarded: 0
reward: 0.0


Timestep: 1
Current state: [[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.

obs: tensor([-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
         3.0000, -1.0000, -1.0000, -1.0000,  0.2000,  0.2000,  1.0000,  1.0000,
         0.3333,  1.0000,  0.2000,  1.0000,  1.0000,  1.0000,  0.2000,  1.0000,
         0.5000,  0.2000,  1.0000,  1.0000,  1.0000,  0.3333,  1.0000,  1.0000,
         2.0000]), Prior: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])
Action: tensor([1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 0., 1., 1.,
        1., 1.])
Number discarded: 9
reward: 2.8


Timestep: 102
Current state: [[0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.

(0.9298245614035088, 0.0, 0.0)

In [8]:
# Save policy
torch.save(nomappo.policy.state_dict(), 'results/models/policy_4_aperiodic_collision.pt')


In [None]:
np.array([1., 1., 0.997148908439231, 0.9627612101067923, 0.8730879305240304])

In [None]:
np.array([1., 1., 1., 0.9931465123362778, 0.9445454992379922])

In [12]:
nomappo = NomaPPO(env, 
                 lr_actor=1e-3,
                 lr_critic=1e-3,
                 hidden_size=128,
                 gamma=0.3,
                 K_epochs=4,
                 eps_clip=0.1,
                 prior_weight=1.,
                 beta=0.1,
                 scheduler=False
                 )

In [16]:
nomappo.policy_old.load_state_dict(torch.load('results/models/policy_4_aperiodic_collision.pt'))

<All keys matched successfully>

In [17]:
nomappo.test(10)

1.0

In [9]:
no = np.random.binomial(1, 0.2, (4,5))
ns = np.random.binomial(1, 0.2, (4,5))

In [10]:
no

array([[0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0]])

In [14]:
no[~np.array([0,1])] = 0

In [15]:
no

array([[0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [29]:
ns

array([[0, 0, 0, 1, 1],
       [1, 0, 1, 0, 1],
       [0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0]])

In [30]:
noo = no.sum(1).nonzero()[0]

In [31]:
nss = ns.sum(1).nonzero()[0]

In [32]:
noo

array([1, 2, 3])

In [33]:
nss

array([0, 1, 3])

In [34]:
np.setdiff1d(nss, noo)

array([0])