In [10]:
import numpy as np
import mxnet as mx
from mxnet import gluon, autograd

In [11]:
y = mx.nd.array([1.0])
print y


[1.]
<NDArray 1 @cpu(0)>


In [35]:
class SwishAct(mx.operator.CustomOp):
    def __init__(self, beta=1.0):
        self._beta = beta
        self._x_sig = None
        
    def forward(self, is_train, req, in_data, out_data, aux):
        """Implements forward computation.

        is_train : bool, whether forwarding for training or testing.
        req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc.
        in_data : list of NDArray, input data.
        out_data : list of NDArray, pre-allocated output buffers.
        aux : list of NDArray, mutable auxiliary states. Usually not used.
        """
        print '===> In forward(): '
        
        print '---> before calc: '
        print 'is_train: ', is_train
        print 'req: ', req
        print 'in_data: ', in_data
        print 'out_data: ', out_data
        print 'aux: ', aux
        
        x = in_data[0]
        self._x_sig = mx.nd.sigmoid(x * self._beta)
        y = x * self._x_sig
        self.assign(out_data[0], req[0], y)
        
        print '---> after calc: '
#         print 'is_train: ', is_train
#         print 'req: ', req
#         print 'in_data: ', in_data
        print 'out_data: ', out_data
#         print 'aux: ', aux    
        

    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        """Implements backward computation

        req : list of {'null', 'write', 'inplace', 'add'}, how to assign to in_grad
        out_grad : list of NDArray, gradient w.r.t. output data.
        in_grad : list of NDArray, gradient w.r.t. input data. This is the output buffer.
        """
        print '===> In backward(): '
        
        print '---> before calc: '
        print 'req: ', req
        print 'out_grad: ', out_grad
        print 'in_data: ', in_data
        print 'out_data: ', out_data
        print 'in_grad: ', in_grad
        print 'aux: ', aux
        
        x = in_data[0]
        y = out_data[0]
        dy = out_grad[0]
        print 'dy in backward(): ', dy
        dx = dy * (y * self._beta + self._x_sig*(1.0 - y * self._beta))
        self.assign(in_grad[0], req[0], dx)
        
        print '---> after calc: '
#         print 'req: ', req
#         print 'out_grad: ', out_grad
#         print 'in_data: ', in_data
#         print 'out_data: ', out_data
#         print 'in_grad: ', in_grad
        print 'aux: ', aux        

In [36]:
@mx.operator.register("SwishAct")  # register with name "sigmoid"
class SwishActProp(mx.operator.CustomOpProp):
    def __init__(self, beta=1.0):
        super(SwishActProp, self).__init__(True)
        # we use constant bias here to illustrate how to pass arguments
        # to operators. All arguments are in string format so you need
        # to convert them back to the type you want.
        self._beta = float(beta)

    def list_arguments(self):
        return ['data']

    def list_outputs(self):
        #  this can be omitted if you only have 1 output.
        return ['output']

    def infer_shape(self, in_shapes):
        """Calculate output shapes from input shapes. This can be
        omited if all your inputs and outputs have the same shape.

        in_shapes : list of shape. Shape is described by a tuple of int.
        """
        data_shape = in_shapes[0]
        output_shape = data_shape
        # return 3 lists representing inputs shapes, outputs shapes, and aux data shapes.
        return (data_shape,), (output_shape,), ()

    def create_operator(self, ctx, in_shapes, in_dtypes):
        #  create and return the CustomOp class.
        return SwishAct(self._beta)

In [37]:
x = mx.nd.array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6])
x = x.reshape((3,4))

beta = 2.0 
# attach gradient buffer to x for autograd
x.attach_grad()
print('--->input') 
print('beta=', beta)
print('x=', x)
print('x.grad=', x.grad)

print('--->forwarding') 
# forward in a record() section to save computation graph for backward
# see autograd tutorial to learn more.
with autograd.record():
    y = mx.nd.Custom(x, beta=beta, op_type='SwishAct')

print('--->after forwarding') 
print('y=', y)
print('y.grad=', y.grad)

print('--->backwarding') 
y.backward()
print('--->after backwarding') 
print('x.grad=', x.grad)


--->input
('beta=', 2.0)
('x=', 
[[-5. -4. -3. -2.]
 [-1.  0.  1.  2.]
 [ 3.  4.  5.  6.]]
<NDArray 3x4 @cpu(0)>)
('x.grad=', 
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
<NDArray 3x4 @cpu(0)>)
--->forwarding
===> In forward(): 
---> before calc: 
is_train:  1
 --->after forwardingreq:  ['write']
in_data:  
[
[[-5. -4. -3. -2.]
 [-1.  0.  1.  2.]
 [ 3.  4.  5.  6.]]
<NDArray 3x4 @cpu(0)>]
out_data:  [
[[ 0.0000000e+00  0.0000000e+00  2.5223372e-44  0.0000000e+00]
 [           nan  0.0000000e+00  2.8168788e+20  2.9206201e+32]
 [ 4.6280266e+27  7.2151485e+22 -5.6501617e+14  4.5657107e-41]]
<NDArray 3x4 @cpu(0)>]
aux:  []
---> after calc: 
out_data:  [
[[-2.2698936e-04 -1.3414006e-03 -7.4178688e-03 -3.5972420e-02]
 [-1.1920292e-01  0.0000000e+00  8.8079703e-01  1.9640275e+00]
 [ 2.9925823e+00  3.9986587e+00  4.9997730e+00  5.9999628e+00]]
<NDArray 3x4 @cpu(0)>]
('y=', 
[[-2.2698936e-04 -1.3414006e-03 -7.4178688e-03 -3.5972420e-02]
 [-1.1920292e-01  0.0000000e+00  8.8079703e-01  1.964027

In [38]:
# x = mx.nd.array([0, 1, 2, 3])
x = mx.nd.array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6])
x = x.reshape((3,4))

# attach gradient buffer to x for autograd
# x.attach_grad()
# forward in a record() section to save computation graph for backward
# see autograd tutorial to learn more.
# with autograd.record():
y = mx.nd.Custom(x, beta=1.0, op_type='SwishAct')
y2 = mx.nd.Custom(x, beta=2.0, op_type='SwishAct')

print(y)
print(y2)



===> In forward(): 
---> before calc: 
 is_train:  0
req:  ['write']
in_data:  [
[[-5. -4. -3. -2.]
 [-1.  0.  1.  2.]
 [ 3.  4.  5.  6.]]
<NDArray 3x4 @cpu(0)>]
out_data:  [
[[0.0000000e+00 0.0000000e+00 2.5223372e-44 0.0000000e+00]
 [          nan 4.5655705e-41 2.8168788e+20 2.9206201e+32]
 [4.6280266e+27 7.2151485e+22 2.0073407e+00 6.0000000e+00]]
<NDArray 3x4 @cpu(0)>]
aux:  []
---> after calc: 
out_data:  [
[[-0.03346425 -0.07194484 -0.14227763 -0.23840584]
 [-0.26894143  0.          0.7310586   1.761594  ]
 [ 2.8577223   3.928055    4.966536    5.9851646 ]]
<NDArray 3x4 @cpu(0)>]
===> In forward(): 
---> before calc: 
is_train:  0
req:  
[[-0.03346425 -0.07194484 -0.14227763 -0.23840584]
 [-0.26894143  0.          0.7310586   1.761594  ]
 [ 2.8577223   3.928055    4.966536    5.9851646 ]]
<NDArray 3x4 @cpu(0)>['write']
in_data:  
[
[[-5. -4. -3. -2.]
 [-1.  0.  1.  2.]
 [ 3.  4.  5.  6.]]
<NDArray 3x4 @cpu(0)>]
out_data:  [
[[0.0000000e+00 0.0000000e+00 2.5223372e-44 0.0000000e+0

In [16]:
print x.grad

None
