In [1]:
import numpy as np
from time import time
import torch
import pprint as pp

# Import the helper files
from utilities import get_time_string, print_elapsed_time

# Environment
* Observation Space: 33 dimensions of continuous type
* Action Space: 4 dimensions of continuous type
* Either one (1) or twenty (20) agents

In [2]:
INPUTDIM = 33
OUTPUTDIM = 4
NUMAGENTS = 20

In [3]:
#from mya2cnet import A2CNetwork
import mya2cnet

# Thx2: https://emacs.stackexchange.com/a/13483
import imp
imp.reload(mya2cnet)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

tstnet = mya2cnet.A2CNetwork(INPUTDIM, OUTPUTDIM).to(device)

## Test if a sample state of a single agent environment is propagated

In [4]:
tststate_np = np.array([
    [ 0.00000000e+00, -4.00000000e+00,  0.00000000e+00,  1.00000000e+00,
     -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00, -1.00000000e+01,  0.00000000e+00,
     1.00000000e+00, -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  7.90150642e+00, -1.00000000e+00,
     1.25147498e+00,  0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
     -2.99753308e-01]
])

In [5]:
tstnet.forward(tststate_np)

(tensor([[-0.1636,  0.1043,  0.1437, -0.0125]], dtype=torch.float64, device='cuda:0'),
 tensor(1.00000e-02 *
        [[-9.9490]], dtype=torch.float64, device='cuda:0'))

# Generate Random Numbers to feed into the network

In [6]:
NRUNS = 1000
NITER = 200

#MODE = 'fullpass'
MODE = 'forward'

np.random.seed( np.uint32( time() ) ) 

a_np = np.zeros( (NRUNS*NITER, NUMAGENTS, OUTPUTDIM) )
v_np = np.zeros( (NRUNS*NITER, NUMAGENTS, 1) )

for itr in range(NITER):
    print(str(NITER - (itr+1)).rjust(3), end = ' -> ')
    with torch.no_grad():
        for cnt in range(NRUNS):
            sta = itr * cnt
            
            if MODE == 'fullpass':
                randstate_np = np.random.randn(NUMAGENTS, INPUTDIM) * 10
                randstate_np = np.clip(randstate_np, -10, 10)
                a, v, _, _ = tstnet.fullpass(randstate_np)
            
            if MODE == 'forward':
                randstate_np = np.random.randn(NUMAGENTS, INPUTDIM)
                randstate_np = np.clip(randstate_np, -1, 1)
                a, v = tstnet.forward(randstate_np)
                ca = torch.mul(a, 6)
            
            a_np[sta+cnt] = a.detach().cpu().numpy()
            v_np[sta+cnt] = v.detach().cpu().numpy()
        
    #Count non zero values in returned actions
    print( f'pt:n0=\t{torch.nonzero(a).size()[0]}, mean=\t{torch.mean(a)}, std=\t{torch.std(a)}, np_std=\t{np.std(a.detach().cpu().numpy())}')
    print( f'Min: {torch.min(ca)} Max: {torch.max(ca)}')

print('FIN!')

        
 

199 -> pt:n0=	80, mean=	-0.0060905509581166625, std=	0.06799461659870971, np_std=	0.06756831385957075
Min: -0.6312941356463011 Max: 0.9991836432962728
198 -> pt:n0=	80, mean=	-0.005687530759310092, std=	0.061749675236888506, np_std=	0.06136252612110207
Min: -0.5933676960353327 Max: 0.8942394751320211
197 -> pt:n0=	80, mean=	0.0030098348986492287, std=	0.07188497198128307, np_std=	0.07143427805885966
Min: -0.7060659910523641 Max: 1.0298311964946147
196 -> pt:n0=	80, mean=	-0.0029851654887165144, std=	0.06879394045229965, np_std=	0.06836262622893899
Min: -0.764595000577023 Max: 0.8910055122879135
195 -> pt:n0=	80, mean=	0.0004194763615049774, std=	0.0708735235188427, np_std=	0.07042917102860329
Min: -0.6801026881998845 Max: 1.0407239227530034
194 -> pt:n0=	80, mean=	-0.0071364746624471105, std=	0.06602341399588084, np_std=	0.06560947001558219
Min: -0.5525230584994492 Max: 0.9313350247073787
193 -> pt:n0=	80, mean=	-0.005106994952367727, std=	0.06537928223011895, np_std=	0.064969376733303

In [7]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Action mean {str(np.mean(a_np[sta:end])).ljust(24)} , std {str(np.std(a_np[sta:end])).ljust(24)} , np_n0:{np.count_nonzero(a_np[sta:end])}')


Action mean -0.0016099218049734397   , std 0.06800384521402045      , np_n0:80000
Action mean -0.0012099513700995854   , std 0.06336627695561695      , np_n0:69200
Action mean -0.001224900164119664    , std 0.06091529666836372      , np_n0:64160
Action mean -0.0011865434014366534   , std 0.059617588655881744     , np_n0:61360
Action mean -0.0011086552524848586   , std 0.05847316541639366      , np_n0:58960
Action mean -0.001176666952571031    , std 0.057595928864677966     , np_n0:57440
Action mean -0.0009014614902586917   , std 0.05713554097965917      , np_n0:55920
Action mean -0.0009691916217066236   , std 0.05642949655322343      , np_n0:54800
Action mean -0.0011184161804341064   , std 0.05567149021589589      , np_n0:53360
Action mean -0.0010465031426574681   , std 0.05516095610716235      , np_n0:52640
Action mean -0.0011000029909413475   , std 0.05481644296882707      , np_n0:52000
Action mean -0.0009859836601330656   , std 0.05432914567860906      , np_n0:51120
Action mean -0.0

In [8]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Value mean {str(np.mean(v_np[sta:end])).ljust(24)}, std {str(np.std(v_np[sta:end])).ljust(24)}, np_n0:{np.count_nonzero(v_np[sta:end])}')


Value mean -0.10388006176783933    , std 0.037952140044127496    , np_n0:20000
Value mean -0.09024514980934831    , std 0.05019559353333523     , np_n0:17300
Value mean -0.08279076461486952    , std 0.05335289183934758     , np_n0:16040
Value mean -0.07936966776813915    , std 0.05520727479047126     , np_n0:15340
Value mean -0.07627012278033342    , std 0.05617458074272736     , np_n0:14740
Value mean -0.07488063383459535    , std 0.05722046363519875     , np_n0:14360
Value mean -0.07279123519009911    , std 0.057611334417890035    , np_n0:13980
Value mean -0.07088421570572125    , std 0.05747443103087925     , np_n0:13700
Value mean -0.06922761874884541    , std 0.05792359162564077     , np_n0:13340
Value mean -0.06847755538583729    , std 0.05820797215429472     , np_n0:13160
Value mean -0.06747379084770382    , std 0.058260921450657706    , np_n0:13000
Value mean -0.06620683742305582    , std 0.05835870775287908     , np_n0:12780
Value mean -0.06629590368710638    , std 0.058640146

In [9]:
start = int( NRUNS*NITER*np.random.rand() )
print('Start:', start)
print(a_np[start:start+100])

Start: 66788
[[[-0.02807158 -0.05899622  0.09437485 -0.08354324]
  [-0.10479089 -0.02658083  0.12548955 -0.00767426]
  [ 0.01088764 -0.09267876  0.08064791 -0.07282104]
  ...
  [ 0.01182508 -0.0387336   0.07912141 -0.04185949]
  [-0.07249321 -0.01944485  0.12859831 -0.08721975]
  [ 0.00197023 -0.03933378  0.09335635 -0.06243252]]

 [[-0.08391527 -0.00931178  0.09981989 -0.02017619]
  [-0.01518552 -0.01181839  0.11330858 -0.02737208]
  [ 0.0001643  -0.01027838  0.09928007 -0.05823138]
  ...
  [-0.00971551 -0.05058889  0.09846873 -0.03722656]
  [ 0.01613794 -0.09128964  0.06616402 -0.0499878 ]
  [-0.06181805  0.02585768  0.10797648 -0.04615349]]

 [[ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  ...
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]]

 ...

 [[-0.04247103 -0.057