In [19]:
import torch
import tensorflow.compat.v1 as tf
import numpy as np
import baselines.common.tf_util as U
from tensorflow.python.ops import math_ops

class Pd(object):
    """
    A particular probability distribution
    """
    def flatparam(self):
        raise NotImplementedError
    def mode(self):
        raise NotImplementedError
    def neglogp(self, x):
        # Usually it's easier to define the negative logprob
        raise NotImplementedError
    def kl(self, other):
        raise NotImplementedError
    def entropy(self):
        raise NotImplementedError
    def sample(self):
        raise NotImplementedError
    def logp(self, x):
        return - self.neglogp(x)

class CategoricalPd(Pd):
    def __init__(self, logits):
        self.logits = logits
    def flatparam(self):
        return self.logits
    def mode(self):
        return tf.argmax(self.logits, axis=-1)
    def neglogp(self, x):
        # return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=x)
        # Note: we can't use sparse_softmax_cross_entropy_with_logits because
        #       the implementation does not allow second-order derivatives...
        one_hot_actions = tf.one_hot(x, self.logits.get_shape().as_list()[-1])
        return tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=self.logits,
            labels=one_hot_actions)
    def kl(self, other):
        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
        a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keepdims=True)
        ea0 = tf.exp(a0)
        ea1 = tf.exp(a1)
        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
        z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True)
        p0 = ea0 / z0
        return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)
    def entropy(self):
        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
        ea0 = tf.exp(a0)
        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
        p0 = ea0 / z0
        return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)
    def sample(self):
        u = tf.random_uniform(tf.shape(self.logits))
        return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1)
    @classmethod
    def fromflat(cls, flat):
        return cls(flat)

In [20]:
x = tf.constant([[-1.0, 1.0]])
pd = CategoricalPd(x)
#a = pd.sample()
a = tf.constant([1])
print(pd.neglogp(a))
tf.exp(pd.neglogp(a))

tf.Tensor([0.12692805], shape=(1,), dtype=float32)


<tf.Tensor: id=225, shape=(1,), dtype=float32, numpy=array([1.1353353], dtype=float32)>

In [21]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F

# Categorical
FixedCategorical = torch.distributions.Categorical

old_sample = FixedCategorical.sample
FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)

log_prob_cat = FixedCategorical.log_prob
FixedCategorical.log_probs = lambda self, actions: log_prob_cat(
    self, actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1)

FixedCategorical.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True)



x = torch.Tensor([[-1.0,1.0]])
dist = FixedCategorical(logits=x)
a = torch.Tensor([1])
print(dist.log_probs(a))
print('More decimals', '{:.10f}'.format(dist.log_probs(a).item()))  


tensor([[-0.1269]])
More decimals -0.1269280910


In [22]:
exp = torch.exp(-dist.log_probs(a))
print('Sum grads', '{:.10f}'.format(exp.item()))

Sum grads 1.1353353262


In [23]:
import torch
import torch.nn as nn
x = torch.Tensor([[-1.0,1.0]])
a = torch.Tensor([1])
criterion1 = nn.CrossEntropyLoss()
loss1 = criterion1(x, a.long())
print('Sum grads', '{:.10f}'.format(torch.exp(loss1).item()))

Sum grads 1.1353353262


In [None]:
1.1353353