In [1]:
import numpy as np
from time import time
import torch
import pprint as pp

# Import the helper files
from utilities import get_time_string, print_elapsed_time

# Environment
* Observation Space: 33 dimensions of continuous type
* Action Space: 4 dimensions of continuous type
* Either one (1) or twenty (20) agents

In [2]:
INPUTDIM = 33
OUTPUTDIM = 4
NUMAGENTS = 1

In [3]:
#from mya2cnet import A2CNetwork
import mya2cnet

# Thx2: https://emacs.stackexchange.com/a/13483
import imp
imp.reload(mya2cnet)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

tstnet = mya2cnet.A2CNetwork(INPUTDIM, OUTPUTDIM).to(device)

## Test if a sample state of a single agent environment is propagated

In [4]:
tststate_np = np.array([
    [ 0.00000000e+00, -4.00000000e+00,  0.00000000e+00,  1.00000000e+00,
     -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00, -1.00000000e+01,  0.00000000e+00,
     1.00000000e+00, -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  7.90150642e+00, -1.00000000e+00,
     1.25147498e+00,  0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
     -2.99753308e-01]
])

In [5]:
tstnet.forward(tststate_np)

(tensor([[-0.0051,  0.1328,  0.0052, -0.0617]], dtype=torch.float64, device='cuda:0'),
 tensor(1.00000e-02 *
        [[ 4.8203]], dtype=torch.float64, device='cuda:0'))

# Generate Random Numbers to feed into the network

In [6]:
NRUNS = 1000
NITER = 10

a_np = np.zeros( (NRUNS*NITER, NUMAGENTS, OUTPUTDIM) )
v_np = np.zeros( (NRUNS*NITER, NUMAGENTS, 1) )

for itr in range(NITER):
    print(itr+1, end = ' -> ')
    for cnt in range(NRUNS):
        sta = itr * cnt
        randstate_np = np.random.randn(NUMAGENTS, INPUTDIM) * 10
        randstate_np = np.clip(randstate_np, -10, 10)
        a, v, _, _ = tstnet.fullpass(randstate_np)

        a_np[sta+cnt] = a.detach().cpu().numpy()
        v_np[sta+cnt] = v.detach().cpu().numpy()

print('FIN!')

        
 

1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 -> 10 -> FIN!


In [7]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Action mean {str(np.mean(a_np[sta:end])).ljust(24)} & std {str(np.std(a_np[sta:end])).ljust(24)}')


Action mean 0.1264653792821894       & std 0.49359264222125415     
Action mean 0.09716996034751203      & std 0.43659497419434173     
Action mean 0.08338230299904854      & std 0.4048099259934109      
Action mean 0.07327580515307648      & std 0.3810211078189575      
Action mean 0.06623820915357735      & std 0.362530174164077       
Action mean 0.05666445488549829      & std 0.3362911526248267      
Action mean 0.049323307840203647     & std 0.3148658454544771      
Action mean 0.0366031248950667       & std 0.27156319502341913     
Action mean 0.025329392716284087     & std 0.2263116040282505      
Action mean 0.012692236222705532     & std 0.16076698181259003     


In [8]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Value mean {str(np.mean(v_np[sta:end])).ljust(24)} & std {str(np.std(v_np[sta:end])).ljust(24)}')


Value mean 0.0476387424429003       & std 0.0009870148371510256   
Value mean 0.036613174067540576     & std 0.020027333313972582    
Value mean 0.03132208159110615      & std 0.022644742685977114    
Value mean 0.027573756256877517     & std 0.023474419542461072    
Value mean 0.0249487317155893       & std 0.023836701604464584    
Value mean 0.02131433412156333      & std 0.02366816490187974     
Value mean 0.018600610548027578     & std 0.023223231079879794    
Value mean 0.01374313971486581      & std 0.021562532137860385    
Value mean 0.009506431971284368     & std 0.01901775941193088     
Value mean 0.0047703468179125215    & std 0.014313360647998452    


In [9]:
start = int( NRUNS*NITER*np.random.rand() )
print('Start:', start)
print(a_np[start:start+100])

Start: 7179
[[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[-2.82470902e-02  9.33659208e-01  2.97762210e-02 -4.09845137e-01]]

 [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[-4.62647414e-02  9.22291061e-01  2.03932443e-02 -4.09086668e-01]]

 [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[-4.17019699e-02  9.36616515e-01  2.27215829e-02 -4.22173637e-01]]

 [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]]

 [[-2.77493952e-02  9.30600384e-01  1.62156727e-02 -4.00560265e-01]]

 [[-5.13901194e-02  9.35744414e-01  1.61314699e-02 -4.05755370e-01]]

 [[-3.16173995e-02  9.31097742e-01  3.98188450e-02 -4.15358557e-01]]

 [[ 0.00