In [1]:
import mxnet as mx
from mxnet import gluon, nd, autograd
import mxnet.ndarray as F

In [3]:
help(gluon.nn.Conv2D)

Help on class Conv2D in module mxnet.gluon.nn.conv_layers:

class Conv2D(_Conv)
 |  2D convolution layer (e.g. spatial convolution over images).
 |  
 |  This layer creates a convolution kernel that is convolved
 |  with the layer input to produce a tensor of
 |  outputs. If `use_bias` is True,
 |  a bias vector is created and added to the outputs. Finally, if
 |  `activation` is not `None`, it is applied to the outputs as well.
 |  
 |  If `in_channels` is not specified, `Parameter` initialization will be
 |  deferred to the first time `forward` is called and `in_channels` will be
 |  inferred from the shape of input data.
 |  
 |  Parameters
 |  ----------
 |  channels : int
 |      The dimensionality of the output space, i.e. the number of output
 |      channels (filters) in the convolution.
 |  kernel_size :int or tuple/list of 2 int
 |      Specifies the dimensions of the convolution window.
 |  strides : int or tuple/list of 2 int,
 |      Specify the strides of the convolution.

In [2]:
a = nd.array([20, 30, 40])
a.attach_grad()

b = nd.array([20, 30, 40])
b.attach_grad()

with autograd.record():
    c = F.softmax(a * b)
    d = nd.argmax(c)
    loss = d.sum()

loss.backward()

In [2]:
a = nd.array([[0.2, 0.6, 0.2], [0.4, 0.5, 0.1]])
b = nd.sample_multinomial(a)
print(b)


[1 1]
<NDArray 2 @cpu(0)>


In [2]:
help(gluon.loss.SoftmaxCrossEntropyLoss)

Help on class SoftmaxCrossEntropyLoss in module mxnet.gluon.loss:

class SoftmaxCrossEntropyLoss(Loss)
 |  Computes the softmax cross entropy loss. (alias: SoftmaxCELoss)
 |  
 |  If `sparse_label` is `True` (default), label should contain integer
 |  category indicators:
 |  
 |  .. math::
 |  
 |      \DeclareMathOperator{softmax}{softmax}
 |  
 |      p = \softmax({pred})
 |  
 |      L = -\sum_i \log p_{i,{label}_i}
 |  
 |  `label`'s shape should be `pred`'s shape with the `axis` dimension removed.
 |  i.e. for `pred` with shape (1,2,3,4) and `axis = 2`, `label`'s shape should
 |  be (1,2,4).
 |  
 |  If `sparse_label` is `False`, `label` should contain probability distribution
 |  and `label`'s shape should be the same with `pred`:
 |  
 |  .. math::
 |  
 |      p = \softmax({pred})
 |  
 |      L = -\sum_i \sum_j {label}_j \log p_{ij}
 |  
 |  Parameters
 |  ----------
 |  axis : int, default -1
 |      The axis to sum over when computing softmax and entropy.
 |  sparse_label :

In [2]:
a = nd.array([10, 2, 5])
b = F.log_softmax(a)
print(b)


[ -7.04850303e-03  -8.00704861e+00  -5.00704861e+00]
<NDArray 3 @cpu(0)>


In [3]:
action = nd.array([1])

loss = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=True, from_logits=False)

print(-loss(a, action))


[-8.00704861]
<NDArray 1 @cpu(0)>


In [2]:
def entropy(logits):
    a0 = logits - nd.max(logits, axis=-1, keepdims=True)
    ea0 = nd.exp(a0)
    z0 = nd.sum(ea0, axis=-1, keepdims=True)
    p0 = ea0 / z0
    return nd.sum(p0 * (nd.log(z0) - a0), axis=-1)

In [3]:
a = nd.array([0.2, 0.4, 0.4])
b = entropy(a)

In [4]:
b


[ 1.09437883]
<NDArray 1 @cpu(0)>

In [1]:
import tensorflow as tf

def entropy(logits):
    a0 = logits - tf.reduce_max(logits, axis=-1, keep_dims=True)
    ea0 = tf.exp(a0)
    z0 = tf.reduce_sum(ea0, axis=-1, keep_dims=True)
    p0 = ea0 / z0
    return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)

a = tf.Variable([0.2, 0.4, 0.4])
b = entropy(a)

  return f(*args, **kwds)


In [2]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    c = sess.run(b)
    
print(c)

1.09438


In [11]:
import tensorflow as tf

def sample(logits):
    u = tf.random_uniform(tf.shape(logits))
    return tf.argmax(logits - tf.log(-tf.log(u)), axis=-1)

a = tf.Variable([9, 10., 2.])
b = [sample(a) for _ in range(100)]

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    c = sess.run(b)
    
print(c)

  if d.decorator_argspec is not None), _inspect.getargspec(target))


[1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [15]:
from mxnet import nd

def sample(logits):
    u = nd.random.uniform(shape=logits.shape)
    return nd.argmax(logits - nd.log(-nd.log(u)), axis=-1)

a = nd.array([9, 10., 2.])
b = [int(sample(a).asscalar()) for _ in range(100)]
print(b)

[1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1]
