In [1]:
import numpy as np
from time import time
import torch
import pprint as pp

# Import the helper files
from utilities import get_time_string, print_elapsed_time

# Environment
* Observation Space: 33 dimensions of continuous type
* Action Space: 4 dimensions of continuous type
* Either one (1) or twenty (20) agents

In [2]:
INPUTDIM = 33
OUTPUTDIM = 4
NUMAGENTS = 1

In [3]:
#from mya2cnet import A2CNetwork
import mya2cnet

# Thx2: https://emacs.stackexchange.com/a/13483
import imp
imp.reload(mya2cnet)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

tstnet = mya2cnet.A2CNetwork(INPUTDIM, OUTPUTDIM).to(device)

## Test if a sample state of a single agent environment is propagated

In [4]:
tststate_np = np.array([
    [ 0.00000000e+00, -4.00000000e+00,  0.00000000e+00,  1.00000000e+00,
     -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00, -1.00000000e+01,  0.00000000e+00,
     1.00000000e+00, -0.00000000e+00, -0.00000000e+00, -4.37113883e-08,
     0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
     0.00000000e+00,  0.00000000e+00,  7.90150642e+00, -1.00000000e+00,
     1.25147498e+00,  0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
     -2.99753308e-01]
])

In [5]:
tstnet.forward(tststate_np)

(tensor([[-0.0051,  0.1328,  0.0052, -0.0617]], dtype=torch.float64, device='cuda:0'),
 tensor(1.00000e-02 *
        [[ 4.8203]], dtype=torch.float64, device='cuda:0'))

# Generate Random Numbers to feed into the network

In [6]:
NRUNS = 1000
NITER = 10

a_np = np.zeros( (NRUNS*NITER, NUMAGENTS, OUTPUTDIM) )
v_np = np.zeros( (NRUNS*NITER, NUMAGENTS, 1) )

for itr in range(NITER):
    for cnt in range(NRUNS):
        sta = itr * cnt
        randstate_np = np.random.randn(NUMAGENTS, INPUTDIM) * 10
        randstate_np = np.clip(randstate_np, -10, 10)
        a,v = tstnet.forward(randstate_np)

        a_np[sta+cnt] = a.detach().cpu().numpy()
        v_np[sta+cnt] = v.detach().cpu().numpy()

        
 

In [7]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Action mean {str(np.mean(a_np[sta:end])).ljust(24)} & std {str(np.std(a_np[sta:end])).ljust(24)}')


Action mean 0.01806992940956553      & std 0.07055920574762753     
Action mean 0.013946419141299295     & std 0.06234564412910799     
Action mean 0.01187139778571822      & std 0.0578098392384888      
Action mean 0.010486230702119109     & std 0.05446580034043345     
Action mean 0.009435397248502547     & std 0.05181894710223405     
Action mean 0.008102069348195315     & std 0.04806598557903131     
Action mean 0.007067731179863056     & std 0.04497354104879684     
Action mean 0.00520536393794932      & std 0.03877280594804015     
Action mean 0.003604039979182432     & std 0.03235964608809426     
Action mean 0.0018184906032108763    & std 0.022929255438498063    


In [8]:
for iter in range(NITER):
    sta = iter * NRUNS 
    end = sta + NRUNS
    print(f'Value mean {str(np.mean(v_np[sta:end])).ljust(24)} & std {str(np.std(v_np[sta:end])).ljust(24)}')


Value mean 0.047655303408181464     & std 0.0009885637855382723   
Value mean 0.03668349388732266      & std 0.02006505138265131     
Value mean 0.031278761237937984     & std 0.022612699816849564    
Value mean 0.02758901156531         & std 0.023487843528357262    
Value mean 0.02489393306238604      & std 0.023784544977438305    
Value mean 0.02134127864920494      & std 0.023698726188102735    
Value mean 0.018634025755492643     & std 0.023262664488863726    
Value mean 0.013740400377345164     & std 0.021557796865836178    
Value mean 0.009512926029381674     & std 0.01903088086369295     
Value mean 0.004763119212104934     & std 0.014293132035872617    
