In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from src.utils import mlp, MLPGaussianActor, MLPCritic, MLPActorCritic

In [2]:
obs_dim = 10
action_space = torch.tensor([[1, 0, 1], [0, 1, 3]])
hidden_sizes = (64, 64)
activation = nn.Tanh

random_state = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                             [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], dtype=torch.float32)

# Core

## Actor

In [36]:
actor = MLPGaussianActor(obs_dim, action_space.shape[0], hidden_sizes, activation)

In [37]:
actor.log_std

Parameter containing:
tensor([-0.5000, -0.5000], requires_grad=True)

In [71]:
action = actor._distribution(random_state).sample()
action

tensor([[-0.2400, -0.4764],
        [-0.7703,  0.8818]])

In [72]:
pi, logp_a = actor(random_state, action)
print(pi)
print(logp_a)

Normal(loc: torch.Size([2, 2]), scale: torch.Size([2, 2]))
tensor([-1.3901, -2.4319], grad_fn=<SumBackward1>)


## Critic

## Discount

In [9]:
x = np.array([1, 2, 3, 4, 5])

In [13]:
from src.utils import discount_cumsum
import scipy.signal

a = scipy.signal.lfilter([1], [1, float(-0.9)], x[::-1], axis=0)[::-1]
b = discount_cumsum(x, 0.9)
print('spinning up: ', a)
print('mine: ', b)
print(a==b)

spinning up:  [11.4265 11.585  10.65    8.5     5.    ]
mine:  [11.4265 11.585  10.65    8.5     5.    ]
[ True  True  True  True  True]


## Combined shape

In [73]:
def combined_shape(length, shape=None):
    if shape is None:
        return (length,)
    return (length, shape) if np.isscalar(shape) else (length, *shape)

In [80]:
obs_dim = 10
buffer_size = 20

#np.zeros(combined_shape(buffer_size), dtype=np.float32).shape
np.zeros(combined_shape(buffer_size, obs_dim), dtype=np.float32).shape

(20, 10)

# dict

In [1]:
data = dict(obs=[1, 2, 3], act=[1, 2, 3], ret=[1, 2, 3],
                    adv=[1, 2, 3], logp=[1, 2, 3])
data

{'obs': [1, 2, 3],
 'act': [1, 2, 3],
 'ret': [1, 2, 3],
 'adv': [1, 2, 3],
 'logp': [1, 2, 3]}