In [1]:
# We will use the gluon trainer to update model parameters during netwrok training.
# Importing the necessaery packages from MXNet
from mxnet import nd, autograd, optimizer, gluon

In [2]:
# Create and initialize a simple model
net = gluon.nn.Dense(1)
net.initialize()

In [4]:
# Running the forward and backward passes.
# Dataset has 8 sample with 4 features
batch_size = 8
X = nd.random.uniform(shape=(batch_size, 4))
y = nd.random.uniform(shape=(batch_size,))

loss = gluon.loss.L2Loss()

# Function to compute the first two steps 'forward, backward'
def forward_backward():
    with autograd.record():
        l = loss(net(X), y)
    l.backward()
forward_backward()

In [6]:
# Creating the trainer instance using model parameter and simple optimizer stochastic 
# gradient descent, learning rate = 1
trainer = gluon.Trainer(net.collect_params(), 
                       optimizer='sgd', optimizer_params={'learning_rate':1})

In [7]:
# checking the current network parameters
curr_weight = net.weight.data().copy()
print(curr_weight)


[[-0.0196689   0.01582889 -0.00881553  0.0563288 ]]
<NDArray 1x4 @cpu(0)>


In [8]:
# Trainer step
trainer.step(batch_size)
print(net.weight.data())


[[0.20732312 0.20758682 0.370947   0.36136353]]
<NDArray 1x4 @cpu(0)>


In [9]:
# Result of SGD update
# We can notice that the values are identical to the result of trainer.step() earlier..
print(curr_weight - net.weight.grad() * 1/batch_size)


[[0.20732312 0.20758682 0.370947   0.36136353]]
<NDArray 1x4 @cpu(0)>


In [10]:
# Using Optimizer Instance 
# AdamOptimizer is the popular adaptive optimzer for deep learning 
optim = optimizer.Adam(learning_rate = 1)
trainer = gluon.Trainer(net.collect_params(), optim)

In [11]:
forward_backward()
trainer.step(batch_size)
net.weight.data()


[[-0.7926821  -0.79241866 -0.62905896 -0.6386421 ]]
<NDArray 1x4 @cpu(0)>

In [12]:
# Changing the Learning Rate
trainer.learning_rate

1

In [13]:
trainer.set_learning_rate(0.1)
trainer.learning_rate

0.1