<a href="https://colab.research.google.com/github/anubhavgupta1/Dive-Into-Deep-Learning/blob/main/Deep%20Learning%20Computation/Parameters%20Management/mxnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Deep Learning Computation

In [1]:
!pip install d2l==0.16.1
!pip install -U mxnet-cu101==1.7.0

Collecting d2l==0.16.1
[?25l  Downloading https://files.pythonhosted.org/packages/30/2b/3515cd6f2898bf95306a5c58b065aeb045fdc25516f2b68b0f8409e320c3/d2l-0.16.1-py3-none-any.whl (76kB)
[K     |████▎                           | 10kB 13.5MB/s eta 0:00:01[K     |████████▌                       | 20kB 18.0MB/s eta 0:00:01[K     |████████████▉                   | 30kB 9.8MB/s eta 0:00:01[K     |█████████████████               | 40kB 8.6MB/s eta 0:00:01[K     |█████████████████████▍          | 51kB 5.1MB/s eta 0:00:01[K     |█████████████████████████▋      | 61kB 5.5MB/s eta 0:00:01[K     |██████████████████████████████  | 71kB 5.4MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 3.7MB/s 
Installing collected packages: d2l
Successfully installed d2l-0.16.1
Collecting mxnet-cu101==1.7.0
[?25l  Downloading https://files.pythonhosted.org/packages/40/26/9655677b901537f367c3c473376e4106abc72e01a8fc25b1cb6ed9c37e8c/mxnet_cu101-1.7.0-py2.py3-none-manylinux2014_x86_64.w

In [2]:
from mxnet import init, np, npx
from mxnet.gluon import nn
npx.set_np()

In [3]:
X = np.random.uniform(size=(2, 4))

### Model

In [4]:
net = nn.Sequential()
net.add(nn.Dense(8, activation='relu'))
net.add(nn.Dense(1))
net.initialize()  # Use the default initialization method

In [5]:
print(net(X))

[[0.0054572 ]
 [0.00488594]]


###  Parameter Access

In [6]:
print(net[1].params)

dense1_ (
  Parameter dense1_weight (shape=(1, 8), dtype=float32)
  Parameter dense1_bias (shape=(1,), dtype=float32)
)


### Targeted Parameters

In [7]:
print(type(net[1].bias))

<class 'mxnet.gluon.parameter.Parameter'>


In [8]:
print(net[1].bias)

Parameter dense1_bias (shape=(1,), dtype=float32)


In [9]:
print(net[1].bias.data())

[0.]


In [10]:
net[1].weight.grad()

array([[0., 0., 0., 0., 0., 0., 0., 0.]])

### All Parameters at Once

In [11]:
print(net[0].collect_params())

dense0_ (
  Parameter dense0_weight (shape=(8, 4), dtype=float32)
  Parameter dense0_bias (shape=(8,), dtype=float32)
)


In [12]:
print(net[0].collect_params()['dense0_weight'].data())

[[-0.01068833  0.01729892  0.02042518 -0.01618656]
 [-0.00873779 -0.02834515  0.05484822 -0.06206018]
 [ 0.06491279 -0.03182812 -0.01631819 -0.00312688]
 [ 0.0408415   0.04370362  0.00404529 -0.0028032 ]
 [ 0.00952624 -0.01501013  0.05958354  0.04705103]
 [-0.06005495 -0.02276454 -0.0578019   0.02074406]
 [-0.06716943 -0.01844618  0.04656678  0.06400172]
 [ 0.03894195 -0.05035089  0.0518017   0.05181222]]


In [13]:
print(net[0].collect_params()['dense0_bias'].data())

[0. 0. 0. 0. 0. 0. 0. 0.]


In [14]:
print(net[1].collect_params())

dense1_ (
  Parameter dense1_weight (shape=(1, 8), dtype=float32)
  Parameter dense1_bias (shape=(1,), dtype=float32)
)


In [15]:
print(net[1].collect_params()['dense1_weight'].data())

[[ 0.06700657 -0.00369488  0.0418822   0.0421275  -0.00539289  0.00286685
   0.03927409  0.02504314]]


In [16]:
print(net[1].collect_params()['dense1_bias'].data())

[0.]


In [17]:
print(net.collect_params())

sequential0_ (
  Parameter dense0_weight (shape=(8, 4), dtype=float32)
  Parameter dense0_bias (shape=(8,), dtype=float32)
  Parameter dense1_weight (shape=(1, 8), dtype=float32)
  Parameter dense1_bias (shape=(1,), dtype=float32)
)


### Parameter Initialization

In [18]:
# Here `force_reinit` ensures that parameters are freshly initialized even if
# they were already initialized previously
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)

In [19]:
print(net[0].weight.data()[0])

[ 0.00354961 -0.00614133  0.0107317   0.01830765]


In [20]:
#We can also initialize all the parameters to a given constant value (say, 1).
net.initialize(init=init.Constant(1), force_reinit=True)

In [21]:
print(net[0].weight.data()[0])

[1. 1. 1. 1.]


In [22]:
#We can also apply different initializers for certain blocks. 
#For example, below we initialize 
#the first layer with the Xavier initializer and 
#initialize the second layer to a constant value of 42.

net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[1].initialize(init=init.Constant(42), force_reinit=True)

In [23]:
print(net[0].weight.data()[0])
print(net[1].weight.data())

[-0.26102373  0.15249556 -0.19274211 -0.24742058]
[[42. 42. 42. 42. 42. 42. 42. 42.]]
