In [1]:
import numpy as np
from time import time
import torch
import pprint as pp

# Import the helper files
from utilities import get_time_string, print_elapsed_time

# Environment
* Observation Space: 33 dimensions of continuous type
* Action Space: 4 dimensions of continuous type
* Either one (1) or twenty (20) agents

In [2]:
INPUTDIM = 33
OUTPUTDIM = 4
NUMAGENTS = 20

In [3]:
#from mya2cnet import A2CNetwork
import mya2cnet

# Thx2: https://emacs.stackexchange.com/a/13483
import imp
imp.reload(mya2cnet)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

tstnet = mya2cnet.A2CNetwork(INPUTDIM, OUTPUTDIM).to(device)

## Test if a sample state of a single agent environment is propagated

In [4]:
tststate_np = np.array([
    [ 0.00000000e+00, -4.00000000e+00,  0.00000000e+00,  1.00000000e+00,
     -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00, -1.00000000e+01,  0.00000000e+00,
     1.00000000e+00, -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  7.90150642e+00, -1.00000000e+00,
     1.25147498e+00,  0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
     -2.99753308e-01]
])

In [5]:
tstnet.forward(tststate_np)

(tensor([[ 0.1088, -0.0112, -0.0175,  0.0276]], dtype=torch.float64, device='cuda:0'),
 tensor(1.00000e-02 *
        [[-6.4648]], dtype=torch.float64, device='cuda:0'))

# Generate Random Numbers to feed into the network

In [6]:
NRUNS = 1000
NITER = 500

#MODE = 'fullpass'
MODE = 'forward'

np.random.seed( np.uint32( time() ) ) 

a_np = np.zeros( (NRUNS*NITER, NUMAGENTS, OUTPUTDIM) )
v_np = np.zeros( (NRUNS*NITER, NUMAGENTS, 1) )

for itr in range(NITER):
    print(str(NITER - (itr+1)).rjust(3), end = ' -> ')
    with torch.no_grad():
        for cnt in range(NRUNS):
            sta = itr * cnt
            
            if MODE == 'fullpass':
                randstate_np = np.random.randn(NUMAGENTS, INPUTDIM) * 10
                randstate_np = np.clip(randstate_np, -10, 10)
                a, v, _, _ = tstnet.fullpass(randstate_np)
            
            if MODE == 'forward':
                randstate_np = np.random.randn(NUMAGENTS, INPUTDIM)
                randstate_np = np.clip(randstate_np, -1, 1)
                a, v = tstnet.forward(randstate_np)
            
            a_np[sta+cnt] = a.detach().cpu().numpy()
            v_np[sta+cnt] = v.detach().cpu().numpy()
        
    #Count non zero values in returned actions
    print( f'pt_n0:{torch.nonzero(a).size()[0]}', end = ', ' )

print('FIN!')

        
 

499 -> pt_n0:80, 498 -> pt_n0:80, 497 -> pt_n0:80, 496 -> pt_n0:80, 495 -> pt_n0:80, 494 -> pt_n0:80, 493 -> pt_n0:80, 492 -> pt_n0:80, 491 -> pt_n0:80, 490 -> pt_n0:80, 489 -> pt_n0:80, 488 -> pt_n0:80, 487 -> pt_n0:80, 486 -> pt_n0:80, 485 -> pt_n0:80, 484 -> pt_n0:80, 483 -> pt_n0:80, 482 -> pt_n0:80, 481 -> pt_n0:80, 480 -> pt_n0:80, 479 -> pt_n0:80, 478 -> pt_n0:80, 477 -> pt_n0:80, 476 -> pt_n0:80, 475 -> pt_n0:80, 474 -> pt_n0:80, 473 -> pt_n0:80, 472 -> pt_n0:80, 471 -> pt_n0:80, 470 -> pt_n0:80, 469 -> pt_n0:80, 468 -> pt_n0:80, 467 -> pt_n0:80, 466 -> pt_n0:80, 465 -> pt_n0:80, 464 -> pt_n0:80, 463 -> pt_n0:80, 462 -> pt_n0:80, 461 -> pt_n0:80, 460 -> pt_n0:80, 459 -> pt_n0:80, 458 -> pt_n0:80, 457 -> pt_n0:80, 456 -> pt_n0:80, 455 -> pt_n0:80, 454 -> pt_n0:80, 453 -> pt_n0:80, 452 -> pt_n0:80, 451 -> pt_n0:80, 450 -> pt_n0:80, 449 -> pt_n0:80, 448 -> pt_n0:80, 447 -> pt_n0:80, 446 -> pt_n0:80, 445 -> pt_n0:80, 444 -> pt_n0:80, 443 -> pt_n0:80, 442 -> pt_n0:80, 441 -> pt_n0:8

In [7]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Action mean {str(np.mean(a_np[sta:end])).ljust(24)} , std {str(np.std(a_np[sta:end])).ljust(24)} , np_n0:{np.count_nonzero(a_np[sta:end])}')


Action mean 0.0354966238328266       , std 0.046043545645408586     , np_n0:80000
Action mean 0.030708518665331433     , std 0.044487527629796075     , np_n0:69200
Action mean 0.0284633174140039       , std 0.04360540146099472      , np_n0:64160
Action mean 0.027217954786519744     , std 0.043039996100534064     , np_n0:61360
Action mean 0.026161252496702276     , std 0.04249986506344207      , np_n0:58960
Action mean 0.025503415603319424     , std 0.04217277562505235      , np_n0:57440
Action mean 0.024812908025846576     , std 0.04177971729897176      , np_n0:55920
Action mean 0.02428459374702209      , std 0.041508471826162865     , np_n0:54800
Action mean 0.023683419822997973     , std 0.041160837016166386     , np_n0:53360
Action mean 0.023357741450902646     , std 0.04098300923556149      , np_n0:52640
Action mean 0.02305977973525725      , std 0.04077878367100363      , np_n0:52000
Action mean 0.022670456922394867     , std 0.04057408483662483      , np_n0:51120
Action mean 0.02

In [8]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Value mean {str(np.mean(v_np[sta:end])).ljust(24)}, std {str(np.std(v_np[sta:end])).ljust(24)}, np_n0:{np.count_nonzero(v_np[sta:end])}')


Value mean -0.04882552011273025    , std 0.003265987894884321    , np_n0:20000
Value mean -0.04222564686539625    , std 0.016954208315930683    , np_n0:17300
Value mean -0.03913557825226639    , std 0.0196658554007803      , np_n0:16040
Value mean -0.03744256728853709    , std 0.02083728003481021     , np_n0:15340
Value mean -0.035951838118850506   , std 0.021660237644298183    , np_n0:14740
Value mean -0.03501096648394148    , std 0.022112840601758957    , np_n0:14360
Value mean -0.03409433320765765    , std 0.022540936048318583    , np_n0:13980
Value mean -0.033452662728048074   , std 0.022842988373512143    , np_n0:13700
Value mean -0.03253622874150785    , std 0.023140026645748706    , np_n0:13340
Value mean -0.03212255061434414    , std 0.023313372904557265    , np_n0:13160
Value mean -0.03173236877742132    , std 0.02343517029968193     , np_n0:13000
Value mean -0.031151582345245715   , std 0.02356032400905004     , np_n0:12780
Value mean -0.030979237159048857   , std 0.023630583

In [9]:
start = int( NRUNS*NITER*np.random.rand() )
print('Start:', start)
print(a_np[start:start+100])

Start: 116772
[[[ 0.10274877  0.00330996 -0.00911094  0.04473358]
  [ 0.09811814  0.0064151  -0.01322325  0.03844621]
  [ 0.11280602 -0.00750075 -0.0175494   0.05459872]
  ...
  [ 0.10554838  0.00370329 -0.01735633  0.04983725]
  [ 0.0977287   0.00644305 -0.01441123  0.05036896]
  [ 0.10793834 -0.00430521 -0.01251002  0.04982541]]

 [[ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  ...
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]]

 [[ 0.10228121  0.00173086 -0.0108745   0.04918752]
  [ 0.10259668 -0.00022313 -0.01372521  0.04798045]
  [ 0.10497249 -0.00236881 -0.01277935  0.04979437]
  ...
  [ 0.09910488  0.00412069 -0.0137073   0.05352739]
  [ 0.10578554 -0.00054135 -0.01228321  0.05681851]
  [ 0.1089966  -0.00715201 -0.01679508  0.05042937]]

 ...

 [[ 0.          0.  