<a href="https://colab.research.google.com/github/anubhavgupta1/Dive-Into-Deep-Learning/blob/main/Deep%20Learning%20Computation/Parameters%20Management/pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Deep Learning Computation

In [1]:
!pip install d2l==0.16.1

Collecting d2l==0.16.1
[?25l  Downloading https://files.pythonhosted.org/packages/30/2b/3515cd6f2898bf95306a5c58b065aeb045fdc25516f2b68b0f8409e320c3/d2l-0.16.1-py3-none-any.whl (76kB)
[K     |████▎                           | 10kB 14.5MB/s eta 0:00:01[K     |████████▌                       | 20kB 10.5MB/s eta 0:00:01[K     |████████████▉                   | 30kB 8.4MB/s eta 0:00:01[K     |█████████████████               | 40kB 7.2MB/s eta 0:00:01[K     |█████████████████████▍          | 51kB 4.2MB/s eta 0:00:01[K     |█████████████████████████▋      | 61kB 4.8MB/s eta 0:00:01[K     |██████████████████████████████  | 71kB 4.8MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 3.6MB/s 
Installing collected packages: d2l
Successfully installed d2l-0.16.1


In [2]:
import torch
from torch import nn

In [3]:
X = torch.rand(size=(2, 4))

### Model

In [4]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
print(net)

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)


In [5]:
print(net(X))

tensor([[-0.0707],
        [-0.1749]], grad_fn=<AddmmBackward>)


###  Parameter Access

In [6]:
print(net[0].state_dict())

OrderedDict([('weight', tensor([[ 0.2092,  0.2161, -0.2695,  0.4704],
        [-0.1235, -0.1842, -0.0922, -0.2772],
        [ 0.0442,  0.0949, -0.1404, -0.1510],
        [ 0.4326, -0.1254,  0.4684, -0.0149],
        [ 0.2632,  0.4893, -0.4359, -0.2252],
        [ 0.0066, -0.4961,  0.1482,  0.0482],
        [ 0.3360,  0.3894,  0.1076, -0.1397],
        [-0.1185,  0.4039,  0.3217, -0.4316]])), ('bias', tensor([-0.4466, -0.2632,  0.2700, -0.1697, -0.1912,  0.3978,  0.0123,  0.0688]))])


In [7]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[-0.0179, -0.2431, -0.0833,  0.2956, -0.2660, -0.1980,  0.2242, -0.1656]])), ('bias', tensor([-0.1952]))])


### Targeted Parameters

In [8]:
print(type(net[0].bias))

<class 'torch.nn.parameter.Parameter'>


In [9]:
print(net[0].bias)
print(net[0].bias.data)

Parameter containing:
tensor([-0.4466, -0.2632,  0.2700, -0.1697, -0.1912,  0.3978,  0.0123,  0.0688],
       requires_grad=True)
tensor([-0.4466, -0.2632,  0.2700, -0.1697, -0.1912,  0.3978,  0.0123,  0.0688])


In [10]:
print(net[2].bias)
print(net[2].bias.data)

Parameter containing:
tensor([-0.1952], requires_grad=True)
tensor([-0.1952])


In [11]:
print(net[2].weight.grad)

None


### All Parameters at Once

In [12]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))


In [13]:
print(*[(name, param.shape) for name, param in net.named_parameters()])

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [14]:
print(net.state_dict()['2.bias'].data)

tensor([-0.1952])


### Parameter Initialization

In [15]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

In [16]:
net.apply(init_normal)

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [17]:
print(net[0].weight.data[0], net[0].bias.data[0])

tensor([ 0.0120, -0.0183, -0.0021,  0.0019]) tensor(0.)


In [18]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)

In [19]:
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

In [20]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

In [21]:
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)

In [22]:
net[0].apply(xavier)
net[2].apply(init_42)

Linear(in_features=8, out_features=1, bias=True)

In [23]:
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.3682, -0.0522,  0.0164,  0.4424])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])
