In [18]:
import torch
import torch.nn as nn
# from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import numpy as np

class MLP(nn.Module):
    def __init__(self, input_size, output_size, n_layers, 
            size, activation=torch.tanh, output_activation=None):
        super(MLP, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.activation = activation
        self.size = size
        self.n_layers = n_layers
        self.output_activation = output_activation
        
        
        layers_size = [self.input_size] + ([self.size]*self.n_layers) + [self.output_size]
        self.layers = nn.ModuleList([nn.Linear(layers_size[i], layers_size[i+1]) 
                                    for i in range(len(layers_size)-1)])
        for layer in self.layers:
            nn.init.xavier_uniform_(layer.weight)
        

    def forward(self, x):
        
        out = x
        for layer in self.layers:
            out = self.activation(layer(out))

        if self.output_activation is not None:
            out = self.output_activation(out)
        return out

In [19]:
ac_dim = 4
obs_dim = 10
n_layers = 2
size = 64
learning_rate = 5e-3
criterion = torch.nn.MSELoss(reduction='mean')

In [20]:
torch.manual_seed(0)
np.random.seed(0)

In [21]:
observation = torch.randn(4, obs_dim)
actions = torch.randn(4, ac_dim)

In [22]:
print(observation, actions)

tensor([[-1.1258, -1.1524, -0.2506, -0.4339,  0.8487,  0.6920, -0.3160, -2.1152,
          0.3223, -1.2633],
        [ 0.3500,  0.3081,  0.1198,  1.2377,  1.1168, -0.2473, -1.3527, -1.6959,
          0.5667,  0.7935],
        [ 0.5988, -1.5551, -0.3414,  1.8530, -0.2159, -0.7425,  0.5627,  0.2596,
         -0.1740, -0.6787],
        [ 0.9383,  0.4889,  1.2032,  0.0845, -1.2001, -0.0048, -0.5181, -0.3067,
         -1.5810,  1.7066]]) tensor([[ 0.2055, -0.4503, -0.5731, -0.5554],
        [ 0.5943,  1.5419,  0.5073, -0.5910],
        [-1.3253,  0.1886, -0.0691, -0.4949],
        [-1.4959, -0.1938,  0.4455,  1.3253]])


In [23]:
mean = MLP(obs_dim, output_size=ac_dim, n_layers=n_layers, size=size)
logstd = torch.zeros(ac_dim, requires_grad=True) # BUG
optimizer = optim.Adam([logstd] + list(mean.parameters()), lr=learning_rate)

In [26]:
for param in list(mean.parameters()):
    print(param.data)

tensor([[ 0.0924,  0.0440, -0.2718,  0.1034, -0.0782,  0.2838,  0.2779,  0.0983,
          0.2245,  0.1028],
        [ 0.2694, -0.0263,  0.0704, -0.0470, -0.0050, -0.0859, -0.1127,  0.1647,
          0.1222, -0.1257],
        [-0.2509,  0.1392, -0.2619, -0.0126, -0.2450,  0.2384,  0.2319, -0.2340,
         -0.2647, -0.1684],
        [ 0.0786, -0.1424, -0.0678, -0.0722,  0.0522, -0.1347,  0.1295, -0.2038,
         -0.0253,  0.2387],
        [-0.1572,  0.2185,  0.1482, -0.0920,  0.0987, -0.1924,  0.0229,  0.0807,
          0.2463, -0.1593],
        [-0.1619, -0.1421, -0.2774,  0.0576, -0.2357, -0.0764, -0.1270,  0.0159,
          0.0864, -0.2069],
        [-0.0841, -0.0274, -0.1646,  0.0658,  0.1054, -0.2029,  0.2239,  0.2023,
         -0.0855,  0.0424],
        [ 0.0347, -0.0393, -0.1911,  0.2346,  0.1401, -0.2380, -0.2207,  0.2708,
          0.0454, -0.0654],
        [ 0.1930,  0.0818,  0.1022,  0.2259, -0.0222,  0.2713, -0.2000, -0.1733,
          0.0482, -0.0346],
        [ 0.1625, -

In [31]:
with torch.autograd.set_detect_anomaly(True):
    logits_out = mean(observation)
    sample_ac = logits_out + torch.exp(logstd) * torch.randn(logits_out.size()) # BUG
    loss = criterion(actions, sample_ac)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [32]:
loss

tensor(1.3758, grad_fn=<MeanBackward0>)

In [33]:
logstd

tensor([-0.0118, -0.0124, -0.0125, -0.0145], requires_grad=True)

In [10]:
logstd.detach().numpy()

array([-0.005, -0.005, -0.005, -0.005], dtype=float32)

In [47]:
mean = MLP(obs_dim, output_size=ac_dim, n_layers=n_layers, size=size)
logstd = torch.zeros(ac_dim, requires_grad=True) # BUG

In [48]:
logits_out = mean(observation)
sample_ac = logits_out + torch.exp(logstd) * torch.randn(logits_out.size()) # BUG

In [49]:
loss = criterion(actions, sample_ac)
optimizer = optim.Adam([logstd] + list(mean.parameters()), lr=learning_rate)

# train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [52]:
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [10, 4]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [32]:
normal = torch.distributions.Normal(torch.tensor([1., -1]), scale=torch.tensor([1, 2.]))
diag = torch.distributions.Independent(normal, 1)

In [39]:
type(torch.tensor([1., 3.], dtype=torch.float))

torch.Tensor

In [30]:
import tensorflow as tf
import tensorflow_probability as tfp

In [36]:
sess = tf.Session()
with sess.as_default():
    mvn = tfp.distributions.MultivariateNormalDiag(
        loc=[1., -1],
        scale_diag=[1, 2.])

    print(mvn.log_prob([1, 3]).eval())

-4.5310245




In [17]:
x = torch.distributions.Categorical(probs=torch.tensor([0.2, 0.5, 0.3]))
x.log_prob(torch.tensor(3))

RuntimeError: index 3 is out of bounds for dimension 0 with size 3

In [65]:
torch.squeeze(torch.multinomial(torch.tensor([.2, .3]), num_samples=1))

tensor(1)

In [39]:
x = torch.Tensor([2,3])
x.type()

'torch.FloatTensor'

In [40]:
torch.Tensor([[2, 3, 6], [1, 4, 8]]) * torch.Tensor([2,3, 1])

tensor([[ 4.,  9.,  6.],
        [ 2., 12.,  8.]])

In [10]:
PATH = 'temp.pth'

In [11]:
torch.save({
            'logstd': logstd,
            'mean_preds': mean.state_dict(),
            }, PATH)

In [12]:
checkpoint = torch.load(PATH)

In [13]:
lg = checkpoint['logstd']

In [94]:
import functools
import torch
import inspect
import numpy as np




def _is_method(func):
    spec = inspect.signature(func)
    return 'self' in spec.parameters

def convert_args_to_tensor(positional_args_list=None, keyword_args_list=None):
    """A decorator which converts args in positional_args_list to torch.Tensor

    Args:
        positional_args_list ([list]): [arguments to be converted to torch.Tensor. If None, 
        it will convert all positional arguments to Tensor]
        keyword_args_list ([list]): [arguments to be converted to torch.Tensor. If None, 
        it will convert all keyword arguments to Tensor]
    """
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            
            
            
            _keyword_args_list = keyword_args_list
            _positional_args_list = positional_args_list
            if keyword_args_list is None:
                _keyword_args_list = list(kwargs.keys())

            if positional_args_list is None:
                _positional_args_list = list(range(len(args)))
                if _is_method(func):
                    _positional_args_list = _positional_args_list[1:]
            
            args = list(args)
            for i, arg in enumerate(args):
                if i in _positional_args_list:
                    if type(arg) == np.ndarray:
                        args[i] = torch.from_numpy(arg).type(torch.FloatTensor)
                    elif type(arg) == torch.Tensor:
                        pass
                    else:
                        raise ValueError('Arguments should be Numpy arrays, but argument in position {} is not'.format(str(i)))
            
            for key, arg in kwargs.items():
                if key in _keyword_args_list:
                    if type(arg) == np.ndarray:
                        kwargs[key] = torch.from_numpy(arg).type(torch.FloatTensor)
                    elif type(arg) == torch.Tensor:
                        pass
                    else:
                        raise ValueError('Arguments should be Numpy arrays, but argument {} is not'.format(str(key)))
            
            return func(*args, **kwargs)

        return wrapper

    return decorator
class X:
    @convert_args_to_tensor()
    def function(self, a, b):
        print('a, b: ', a, b)
        return a, b

In [95]:
import inspect

def is_method(func):
    spec = inspect.signature(func)
    print(spec.parameters)
    return 'self' in spec.parameters

In [96]:
x = X()
is_method(x.function)

OrderedDict([('a', <Parameter "a">), ('b', <Parameter "b">)])


False

In [97]:
a, _ = x.function(np.array([2]),np.array([3]))

a, b:  tensor([2.]) tensor([3.])


In [18]:
a

tensor([3.])

In [21]:
import numpy as np
type(np.array([2.,3.])) == np.ndarray

True

In [25]:
list(a.keys())

['2']

In [23]:
for key, d in a.items():
    print(key, d)

2 3


In [83]:
len(torch.tensor([2, 4, 5]).shape)

1