In [1]:
# 模型参数
# http://zh.gluon.ai/chapter_gluon-basics/parameters.html

from mxnet import gluon, nd
from mxnet.gluon import nn

def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(4, activation="relu"))
        net.add(nn.Dense(2))
    return net

x = nd.random.uniform(shape=(3,5))

  from ._conv import register_converters as _register_converters


In [4]:
net = get_net()
net(x)

RuntimeError: Parameter 'sequential2_dense0_weight' has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks

In [5]:
net.initialize()
net(x)


[[0.00212593 0.00365805]
 [0.00161272 0.00441845]
 [0.00204872 0.00352518]]
<NDArray 3x2 @cpu(0)>

In [9]:
# 访问模型参数
my_param = gluon.Parameter('exciting_parameter_yay', shape=(3,3))
my_param.initialize()
(my_param.data(), my_param.grad())

(
 [[-0.04391347  0.03839272  0.03316854]
  [-0.00613896 -0.03968295  0.00958075]
  [-0.05106945 -0.06736943 -0.02462026]]
 <NDArray 3x3 @cpu(0)>, 
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 <NDArray 3x3 @cpu(0)>)

In [12]:
from scipy import stats
stats.describe(my_param.data().asnumpy(), axis=None)

DescribeResult(nobs=9, minmax=(-0.06736943, 0.038392715), mean=-0.01685028, variance=0.0014255325, skewness=0.2764521539211273, kurtosis=-1.2858607428774531)

In [13]:
w = net[0].weight
b = net[0].bias
print('name:', net[0].name)
print('weight:', w)
print('bias:', b)

name: sequential2_dense0
weight: Parameter sequential2_dense0_weight (shape=(4, 5), dtype=<class 'numpy.float32'>)
bias: Parameter sequential2_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)


In [14]:
print('weight:', w.data())
print('weight gradient', w.grad())
print('bias:', b.data())
print('bias gradient', b.grad())

weight: 
[[-0.06206018  0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032   0.00952624]
 [-0.01501013  0.05958354  0.04705103 -0.06005495 -0.02276454]
 [-0.0578019   0.02074406 -0.06716943 -0.01844618  0.04656678]]
<NDArray 4x5 @cpu(0)>
weight gradient 
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
<NDArray 4x5 @cpu(0)>
bias: 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>
bias gradient 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [15]:
params = net.collect_params()
print(params)
print(params['sequential2_dense0_bias'].data())
print(params.get('dense0_weight').data())

sequential2_ (
  Parameter sequential2_dense0_weight (shape=(4, 5), dtype=<class 'numpy.float32'>)
  Parameter sequential2_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter sequential2_dense1_weight (shape=(2, 4), dtype=<class 'numpy.float32'>)
  Parameter sequential2_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>

[[-0.06206018  0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032   0.00952624]
 [-0.01501013  0.05958354  0.04705103 -0.06005495 -0.02276454]
 [-0.0578019   0.02074406 -0.06716943 -0.01844618  0.04656678]]
<NDArray 4x5 @cpu(0)>


In [16]:
# 使用不同的初始函数来初始化
from mxnet import init
params.initialize(init=init.Normal(sigma=0.02), force_reinit=True)
print(net[0].weight.data(), net[0].bias.data())


[[-0.02084965 -0.0265577  -0.02949932 -0.01048284  0.02532511]
 [ 0.01790128 -0.01203189  0.02408112 -0.01942439 -0.01165124]
 [ 0.00743415  0.01860014 -0.02845151 -0.0103524   0.04017665]
 [ 0.00572617  0.01120919  0.01939519 -0.01057075 -0.0377818 ]]
<NDArray 4x5 @cpu(0)> 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [17]:
stats.describe(net[0].weight.data().asnumpy(), axis=None)
# skew and kurtosis
# https://itl.nist.gov/div898/handbook/eda/section3/eda35b.htm

DescribeResult(nobs=20, minmax=(-0.037781797, 0.04017665), mean=-0.002390224, variance=0.00048480355, skewness=0.19391654431819916, kurtosis=-1.080914628439004)

In [25]:
import plotly
# https://plot.ly/python/getting-started/
plotly.tools.set_credentials_file(username='ArisL', api_key='V2d5SAzma9hR3lTW4WZN')
plotly.__version__

'2.5.1'

In [26]:
# https://plot.ly/python/line-and-scatter/
import plotly.plotly as py
import plotly.graph_objs as go

weight_np = net[0].weight.data().asnumpy().reshape(-1)

trace = go.Scatter(x=list(range(weight_np.shape[0])),
                   y=weight_np)
data=[trace]

py.iplot(data, filename)

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~ArisL/0 or inside your plot.ly account where it is named 'basic-scatter'


In [27]:
params.initialize(init=init.One(), force_reinit=True)
print(net[0].weight.data(), net[0].bias.data())


[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
<NDArray 4x5 @cpu(0)> 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [28]:
# 延后的初始化
net = get_net()
net.collect_params()

sequential3_ (
  Parameter sequential3_dense0_weight (shape=(4, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_weight (shape=(2, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

In [29]:
net.initialize()
net.collect_params()

sequential3_ (
  Parameter sequential3_dense0_weight (shape=(4, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_weight (shape=(2, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

In [30]:
net(x)
net.collect_params()

sequential3_ (
  Parameter sequential3_dense0_weight (shape=(4, 5), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_weight (shape=(2, 4), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

In [42]:
# 共享模型参数
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(4, activation='relu'))
    net.add(nn.Dense(4, activation='relu'))
    net.add(nn.Dense(4, activation='relu', params=net[-1].params))
    net.add(nn.Dense(2))

In [43]:
net.initialize()
net(x)
print(net[1].weight.data())
print(net[2].weight.data())
print(net[1].weight.grad())
print(net[2].weight.grad())


[[-0.0144892   0.02456146  0.05335445 -0.03502852]
 [ 0.01137821 -0.02614943  0.05344296  0.06515827]
 [ 0.02695442  0.01238512  0.0315356   0.02235357]
 [ 0.00018541  0.00464887  0.06385171 -0.03772538]]
<NDArray 4x4 @cpu(0)>

[[-0.0144892   0.02456146  0.05335445 -0.03502852]
 [ 0.01137821 -0.02614943  0.05344296  0.06515827]
 [ 0.02695442  0.01238512  0.0315356   0.02235357]
 [ 0.00018541  0.00464887  0.06385171 -0.03772538]]
<NDArray 4x4 @cpu(0)>

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
<NDArray 4x4 @cpu(0)>

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
<NDArray 4x4 @cpu(0)>


In [36]:
# weight and grad shared by reference
print(id(net[1].weight.data()))
print(id(net[2].weight.data()))
print(id(net[1].weight.grad()))
print(id(net[2].weight.grad()))

140480372940696
140480372940696
140480372939632
140480372939632


In [48]:
# 如果两个层共用一个参数，那么求梯度的时候会发生什么？
net = nn.Sequential()
drop_prob1 = 0.2
drop_prob2 = 0.5

with net.name_scope():
    net.add(nn.Flatten())
    # 第一层全连接。
    net.add(nn.Dense(256, activation='relu'))
#     net.add(nn.Dropout(drop_prob1))
    
    # 第二层全连接。
    net.add(nn.Dense(256, activation='relu', params=net[-1].params))
    net.add(nn.Dropout(drop_prob2))
    
    net.add(nn.Dense(10))
net.initialize()

# 读取数据并训练
# 数据获取
import sys
sys.path.append('..')
import utils
from mxnet import gluon

batch_size = 256
def transform(data, label):
#     print(data.shape)
#     print(label.shape)
#     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
    return data.astype('float32') / 255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

# 训练
from mxnet import autograd
from mxnet import gluon
from mxnet import nd

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd',{
    'learning_rate': 0.5
})

learning_rate = .5

for epoch in range(1):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            print(data.shape)
            output = net(data)
            
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        print(net[1].weight.grad())
        print(net[2].weight.grad())
        break
        trainer.step(batch_size)
        
        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)
    
    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))


The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead



(256, 28, 28, 1)



The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead



MXNetError: Shape inconsistent, Provided = [256,784], inferred shape=(256,256)

In [50]:
# 自定义初始化方法
class MyInit(init.Initializer):
    def __init__(self):
        super(MyInit, self).__init__()
        self._verbose = True
    def _init_weight(self, _, arr):
        # 初始化权重，使用out=arr后我们不需指定形状
        print('init weight', arr.shape)
        nd.random.uniform(low=5, high=10, out=arr)

net = get_net()
net.initialize(MyInit())
net(x)
net[0].weight.data()

init weight (4, 5)
init weight (2, 4)



[[7.684615  6.5811    5.5523853 9.166578  7.025178 ]
 [8.879016  7.026868  6.4057646 6.605215  8.569636 ]
 [5.1497517 7.807886  8.686272  6.9983    5.5489225]
 [7.6543007 8.031541  5.426165  8.516088  8.328388 ]]
<NDArray 4x5 @cpu(0)>

In [51]:
net = get_net()
net.initialize()
net(x)

print('default weight:', net[1].weight.data())

w = net[1].weight
w.set_data(nd.ones(w.shape))

print('init to all 1s:', net[1].weight.data())

default weight: 
[[-0.03879906 -0.02696734 -0.05872552  0.00521906]
 [-0.05805647 -0.03155949 -0.0390045  -0.06878341]]
<NDArray 2x4 @cpu(0)>
init to all 1s: 
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
<NDArray 2x4 @cpu(0)>
